diff --git a/mlir/docs/Bufferization.md b/mlir/docs/Bufferization.md
--- a/mlir/docs/Bufferization.md
+++ b/mlir/docs/Bufferization.md
@@ -139,7 +139,7 @@
 ```mlir
 #map = affine_map<(i) -> (i)>
 %0 = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel"]}
-                    outs(%t : tensor<?xf32>) {
+                    inits(%t : tensor<?xf32>) {
   ^bb0(%arg0 : f32):
     %cst = arith.constant 0.0 : f32
     linalg.yield %cst : f32
@@ -153,7 +153,7 @@
 
 ```mlir
 %t = tensor.extract_slice %s [%idx] [%sz] [1] : tensor<?xf32> to tensor<?xf32>
-%0 = linalg.generic ... outs(%t) { ... } -> tensor<?xf32>
+%0 = linalg.generic ... inits(%t) { ... } -> tensor<?xf32>
 %1 = tensor.insert_slice %0 into %s [%idx] [%sz] [1]
     : tensor<?xf32> into tensor<?xf32>
 ```
diff --git a/mlir/docs/Dialects/Linalg/_index.md b/mlir/docs/Dialects/Linalg/_index.md
--- a/mlir/docs/Dialects/Linalg/_index.md
+++ b/mlir/docs/Dialects/Linalg/_index.md
@@ -103,7 +103,7 @@
               %B: memref<?xvector<4xf32>, strided<[2], offset: 1>>) {
   linalg.generic #attrs
   ins(%A: memref<?xf32, strided<[1]>>)
-  outs(%B: memref<?xvector<4xf32>, strided<[2], offset: 1>>) {
+  inits(%B: memref<?xvector<4xf32>, strided<[2], offset: 1>>) {
   ^bb0(%a: f32, %b: vector<4xf32>):
     %c = "some_compute"(%a, %b): (f32, vector<4xf32>) -> (vector<4xf32>)
     linalg.yield %c: vector<4xf32>
@@ -189,7 +189,7 @@
               %B: memref<?xvector<4xf32>>) {
   linalg.generic #attrs
   ins(%A: memref<8x?xf32, strided<[2, 2], offset: 0>>)
-  outs(%B: memref<?xvector<4xf32>>) {
+  inits(%B: memref<?xvector<4xf32>>) {
   ^bb0(%a: f32, %b: vector<4xf32>):
     %c = "some_compute"(%a, %b): (f32, vector<4xf32>) -> (vector<4xf32>)
     linalg.yield %c: vector<4xf32>
@@ -310,7 +310,7 @@
 func.func @example(%A: memref<?x?xf32>, %B: memref<?x?xf32>, %C: memref<?x?xf32>) {
   linalg.generic #attrs
   ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>)
-  outs(%C: memref<?x?xf32>) {
+  inits(%C: memref<?x?xf32>) {
     ^bb0(%a: f32, %b: f32, %c: f32):
       %d = arith.addf %a, %b : f32
       linalg.yield %d : f32
@@ -383,7 +383,7 @@
 func.func @example(%A: memref<?x?xf32>, %B: memref<?x?xf32>, %C: memref<?x?xf32>) {
   linalg.generic #attrs
   ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>)
-  outs(%C: memref<?x?xf32>) {
+  inits(%C: memref<?x?xf32>) {
   ^bb0(%a: f32, %b: f32, %c: f32):
     %d = arith.addf %a, %b : f32
     linalg.yield %d : f32
diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
@@ -68,7 +68,7 @@
     %c = bufferization.alloc_tensor(%d1, %d2) : tensor<?x?xf32, #SparseMatrix>
     %0 = linalg.matmul
       ins(%a, %b: tensor<?x?xf32, #SparseMatrix>, tensor<?x?xf32, #SparseMatrix>)
-      outs(%c: tensor<?x?xf32, #SparseMatrix>) -> tensor<?x?xf32, #SparseMatrix>
+      inits(%c: tensor<?x?xf32, #SparseMatrix>) -> tensor<?x?xf32, #SparseMatrix>
     return %0 : tensor<?x?xf32, #SparseMatrix>
     ```
 
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@@ -59,7 +59,7 @@
     #map = affine_map<(i, j) -> (i, j)>
     linalg.generic {indexing_maps = [#map, #map],
                     iterator_types = ["parallel", "parallel"]}
-      outs(%I, %J : memref<?x?xindex>, memref<?x?xindex>) {
+      inits(%I, %J : memref<?x?xindex>, memref<?x?xindex>) {
       ^bb0(%arg0 : index, %arg1 : index):
       // Access the outer iteration dimension i
       %i = linalg.index 0 : index
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
@@ -71,7 +71,7 @@
       linalg.generic #trait_attribute
           ins(%A, %B : memref<?x?xf32, stride_specification>,
                        memref<?x?xf32, stride_specification>)
-          outs(%C : memref<?x?xf32, stride_specification>)
+          inits(%C : memref<?x?xf32, stride_specification>)
           attrs = {other-optional-attributes}
           {region}
       ```
@@ -112,7 +112,7 @@
       linalg.generic #matmul_trait
         ins(%A, %B : memref<?x?xf32, stride_specification>,
                      memref<?x?xf32, stride_specification>)
-        outs(%C : memref<?x?xf32, stride_specification>)
+        inits(%C : memref<?x?xf32, stride_specification>)
         {other-optional-attributes} {
         ^bb0(%a: f32, %b: f32, %c: f32) :
           %d = arith.mulf %a, %b: f32
@@ -153,7 +153,7 @@
     ```mlir
     %C = linalg.generic #trait_attribute
       ins(%A, %B : tensor<?x?xf32>, memref<?x?xf32, stride_specification>)
-      outs(%C : tensor<?x?xf32>)
+      inits(%C : tensor<?x?xf32>)
       {other-optional-attributes}
       {region}
       -> (tensor<?x?xf32>)
@@ -249,7 +249,7 @@
     ```
       %add = linalg.map
           ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>)
-          outs(%init: tensor<64xf32>)
+          inits(%init: tensor<64xf32>)
           (%lhs_elem: f32, %rhs_elem: f32) {
             %0 = arith.addf %lhs_elem, %rhs_elem: f32
             linalg.yield %0: f32
@@ -263,7 +263,7 @@
     ```
       %add = linalg.map { arith.addf }
           ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>)
-          outs(%init: tensor<64xf32>)
+          inits(%init: tensor<64xf32>)
     ```
   }];
 
@@ -336,7 +336,7 @@
     ```
       %reduce = linalg.reduce
           ins(%input:tensor<16x32x64xf32>)
-          outs(%init:tensor<16x64xf32>)
+          inits(%init:tensor<16x64xf32>)
           dimensions = [1]
           (%in: f32, %out: f32) {
             %0 = arith.addf %out, %in: f32
@@ -352,7 +352,7 @@
     ```
           %reduce = linalg.reduce { arith.addf }
           ins(%input:tensor<16x32x64xf32>)
-          outs(%init:tensor<16x64xf32>)
+          inits(%init:tensor<16x64xf32>)
           dimensions = [1]
     ```
   }];
@@ -420,7 +420,7 @@
     ```
       %transpose = linalg.transpose
           ins(%input:tensor<16x64xf32>)
-          outs(%init:tensor<64x16xf32>)
+          inits(%init:tensor<64x16xf32>)
           permutation = [1, 0]
     ```
   }];
diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -520,7 +520,7 @@
       //                              M   N
       // affine_map<(d0, d1, d2) -> (d0, d1)>
       %0 = linalg.matmul  ins(%A, %B: tensor<?x?xf32>, tensor<?x?xf32>)
-                         outs(    %C: tensor<?x?xf32>)
+                         inits(    %C: tensor<?x?xf32>)
     ```
 
     Specifying packed_sizes [2, 3, 4] results in tiling the iterator dimensions
@@ -534,7 +534,7 @@
       // affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)>
       %0 = linalg.generic_representing_some_higher_d_matmul  
             ins(%A, %B: tensor<?x?x2x4xf32>, tensor<?x?x4x3xf32>)
-           outs(    %C: tensor<?x?x2x3xf32>)
+           inits(    %C: tensor<?x?x2x3xf32>)
     ```
     In particular, note that the second operand `B` has shape `KxNxnxk` (and not
     `KxNxkxn` as one could expect by looking **only** at the operand).
@@ -1052,7 +1052,7 @@
                                             affine_map<(d0) -> ()>],
             iterator_types = ["reduction"]}
       ins(%in : tensor<32xf32>)
-      outs(%out : tensor<f32>) {
+      inits(%out : tensor<f32>) {
       ^bb0(%arg1: f32, %arg2: f32):
         %y = arith.addf %arg1, %arg2 : f32
         linalg.yield %y : f32
@@ -1065,11 +1065,11 @@
       %cst = arith.constant 0.000000e+00 : f32
       %0 = tensor.expand_shape %in [[0, 1]] : tensor<32xf32> into tensor<4x8xf32>
       %1 = tensor.empty() : tensor<4xf32>
-      %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32>
+      %2 = linalg.fill ins(%cst : f32) inits(%1 : tensor<4xf32>) -> tensor<4xf32>
       %3 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                             affine_map<(d0, d1) -> (d0)>],
         iterator_types = ["parallel", "reduction"]}
-        ins(%0 : tensor<4x8xf32>) outs(%2 : tensor<4xf32>) {
+        ins(%0 : tensor<4x8xf32>) inits(%2 : tensor<4xf32>) {
         ^bb0(%arg3: f32, %arg5: f32):
         %5 = arith.addf %arg3, %arg4 : f32
         linalg.yield %5 : f32
@@ -1077,7 +1077,7 @@
       %r = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>,
                                             affine_map<(d0) -> ()>],
         iterator_types = ["reduction"]}
-        ins(%3 : tensor<4xf32>) outs(%out : tensor<f32>) {
+        ins(%3 : tensor<4xf32>) inits(%out : tensor<f32>) {
         ^bb0(%arg3: f32, %arg4: f32):
         %5 = arith.addf %arg3, %arg4 : f32
         linalg.yield %5 : f32
@@ -1103,7 +1103,7 @@
 
     ```
      %0 = linalg.matmul ins(%A, %B: tensor<16x256xf32>, tensor<256x32xf32>)
-       outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
+       inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
     ```
 
     Is transformed to:
@@ -1117,14 +1117,14 @@
      #map5 = affine_map<(d0, d1, d2) -> (d0, d1)>
      %0 = tensor.empty() : tensor<16x32x64xf32>
      %cst = arith.constant 0.000000e+00 : f32
-     %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<16x32x64xf32>) ->
+     %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<16x32x64xf32>) ->
         tensor<16x32x64xf32>
      %2 = tensor.empty() : tensor<64x4xi1>
 
      %3 = linalg.generic {indexing_maps = [#map0, #map1, #map2, #map3],
        iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
        ins(%A, %B, %2 : tensor<16x256xf32>, tensor<256x32xf32>, tensor<64x4xi1>)
-       outs(%1 : tensor<16x32x64xf32>) {
+       inits(%1 : tensor<16x32x64xf32>) {
          ^bb0(%arg3: f32, %arg4: f32, %arg5: i1, %arg6: f32):
            %5 = arith.mulf %arg3, %arg4 : f32
            %6 = arith.addf %arg6, %5 : f32
@@ -1134,7 +1134,7 @@
      %4 = linalg.generic {indexing_maps = [#map4, #map5],
        iterator_types = ["parallel", "parallel", "reduction"]}
        ins(%3 : tensor<16x32x64xf32>)
-       outs(%C : tensor<16x32xf32>) {
+       inits(%C : tensor<16x32xf32>) {
          ^bb0(%arg3: f32, %arg4: f32):
            %5 = arith.addf %arg3, %arg4 : f32
            linalg.yield %5 : f32
@@ -1209,7 +1209,7 @@
                                               affine_map<(d0, d1) -> (d0)>],
       iterator_types = ["parallel", "reduction"]}
       ins(%arg0 : tensor<?x?xf32>)
-      outs(%out : tensor<?xf32>) {
+      inits(%out : tensor<?xf32>) {
         ^bb0(%arg7: f32, %arg9: f32):
         %1 = arith.addf %arg7, %arg9 : f32
         linalg.yield %1 : f32
@@ -1221,7 +1221,7 @@
 
     ```
       %0 = tensor.empty(%dim_1) : tensor<?x5xf32>
-      %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?x5xf32>) -> tensor<?x5xf32>
+      %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<?x5xf32>) -> tensor<?x5xf32>
       %2 = scf.for %arg2 = %c0 to %dim_0 step %c5 iter_args(%arg3 = %1) -> (tensor<?x5xf32>) {
         %extracted_slice = tensor.extract_slice %1[0, 0] [%dim, 5] [1, 1] : tensor<?x5xf32> to tensor<?x5xf32>
         %extracted_slice_2 = tensor.extract_slice %arg0[0, %arg2] [%dim, 5] [1, 1] : tensor<?x?xf32> to tensor<?x5xf32>
@@ -1229,7 +1229,7 @@
                                               affine_map<(d0, d1) -> (d0, d1)>],
         iterator_types = ["parallel", "parallel"]}
         ins(%extracted_slice_2 : tensor<?x5xf32>)
-        outs(%extracted_slice : tensor<?x5xf32>) {
+        inits(%extracted_slice : tensor<?x5xf32>) {
         ^bb0(%in: f32, %out: f32):
           %5 = arith.addf %in, %out : f32
           linalg.yield %5 : f32
@@ -1242,7 +1242,7 @@
                                             affine_map<(d0, d1) -> (d0)>],
       iterator_types = ["parallel", "reduction"]}
       ins(%2 : tensor<?x5xf32>)
-      outs(%arg1 : tensor<?xf32>) {
+      inits(%arg1 : tensor<?xf32>) {
       ^bb0(%in: f32, %out: f32):
         %4 = arith.addf %in, %out : f32
         linalg.yield %4 : f32
@@ -1314,7 +1314,7 @@
                                               affine_map<(d0, d1) -> (d0)>],
       iterator_types = ["parallel", "reduction"]}
       ins(%arg0 : tensor<?x?xf32>)
-      outs(%out : tensor<?xf32>) {
+      inits(%out : tensor<?xf32>) {
         ^bb0(%arg7: f32, %arg9: f32):
         %1 = arith.addf %arg7, %arg9 : f32
         linalg.yield %1 : f32
@@ -1326,7 +1326,7 @@
 
     ```
       %0 = tensor.empty(%dim_1) : tensor<?x5xf32>
-      %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?x5xf32>) -> tensor<?x5xf32>
+      %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<?x5xf32>) -> tensor<?x5xf32>
       %2 = scf.forall (%arg2) in (%c5) shared_outs(%arg3 = %1) -> (tensor<?x5xf32>) {
         %4 = affine.min #map(%arg2)[%dim_0]
         %5 = affine.max #map1(%4)
@@ -1334,7 +1334,7 @@
         %6 = affine.apply #map2(%arg2)[%dim_0]
         %extracted_slice_2 = tensor.extract_slice %arg0[0, %6] [%dim, %5] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
         %extracted_slice_3 = tensor.extract_slice %extracted_slice[0] [%dim] [1] : tensor<?xf32> to tensor<?xf32>
-        %7 = linalg.generic {indexing_maps = [#map3, #map4], iterator_types = ["parallel", "reduction"]} ins(%extracted_slice_2 : tensor<?x?xf32>) outs(%extracted_slice_3 : tensor<?xf32>) {
+        %7 = linalg.generic {indexing_maps = [#map3, #map4], iterator_types = ["parallel", "reduction"]} ins(%extracted_slice_2 : tensor<?x?xf32>) inits(%extracted_slice_3 : tensor<?xf32>) {
         ^bb0(%in: f32, %out: f32):
           %9 = arith.addf %in, %out : f32
           linalg.yield %9 : f32
@@ -1343,7 +1343,7 @@
           tensor.parallel_insert_slice %7 into %arg3[0, %arg2] [%dim, 1] [1, 1] : tensor<?xf32> into tensor<?x5xf32>
         }
       } {mapping = []}
-      %3 = linalg.generic {indexing_maps = [#map3, #map4], iterator_types = ["parallel", "reduction"]} ins(%2 : tensor<?x5xf32>) outs(%arg1 : tensor<?xf32>) {
+      %3 = linalg.generic {indexing_maps = [#map3, #map4], iterator_types = ["parallel", "reduction"]} ins(%2 : tensor<?x5xf32>) inits(%arg1 : tensor<?xf32>) {
       ^bb0(%in: f32, %out: f32):
         %4 = arith.addf %in, %out : f32
         linalg.yield %4 : f32
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -310,7 +310,7 @@
 /// is lowered to:
 ///
 /// %alloc = memref.alloc
-/// linalg.fill ... outs(%alloc)
+/// linalg.fill ... inits(%alloc)
 /// %subview = memref.subview %alloc [%l] [...] [1]
 /// memref.tensor_store %t, %subview
 /// %0 = bufferization.to_tensor %alloc restrict writable
@@ -437,20 +437,20 @@
 /// For example, the following op:
 ///
 ///   linalg.matmul ins(%0, %1 : tensor<128x32xf32>, tensor<32x64xf32>)
-///                 outs(%2 : tensor<128x64xf32>)
+///                 inits(%2 : tensor<128x64xf32>)
 ///
 /// split along the first dimension at position 42 will result in:
 ///
 ///   %3 = tensor.extract_slice %0[0, 0][42, 32][1, 1]
 ///   %4 = tensor.extract_slice %2[0, 0][42, 64][1, 1]
 ///   %5 = linalg.matmul ins(%3, %1 : tensor<42x32xf32>, tensor<32x64xf32>)
-///                      outs(%5 : tensor<42x64xf32>)
+///                      inits(%5 : tensor<42x64xf32>)
 ///   %6 = tensor.insert_slice %5 into %2[0, 0][42, 64][1, 1]
 ///
 ///   %7 = tensor.extract_slice %0[42, 0][86, 32][1, 1]
 ///   %8 = tensor.extract_slice %6[42, 0][86, 64][1, 1]
 ///   %9 = linalg.matmul ins(%7, %1 : tensor<86x32xf32>, tensor<32x64xf32>)
-///                      outs(%8 : tensor<86x64xf32>)
+///                      inits(%8 : tensor<86x64xf32>)
 ///   tensor.insert_slice %5 into %6[42, 0][86, 64][1, 1]
 ///
 /// Note that there is no simplification other than constant propagation applied
@@ -782,7 +782,7 @@
 ///                                        affine_map<(d0) -> ()>],
 ///       iterator_types = ["reduction"]}
 ///  ins(%in : tensor<32xf32>)
-///  outs(%out : tensor<f32>) {
+///  inits(%out : tensor<f32>) {
 ///  ^bb0(%arg1: f32, %arg2: f32):
 ///    %y = arith.addf %arg1, %arg2 : f32
 ///    linalg.yield %y : f32
@@ -793,11 +793,11 @@
 ///  %cst = arith.constant 0.000000e+00 : f32
 ///  %0 = tensor.expand_shape %in [[0, 1]] : tensor<32xf32> into
 ///  tensor<4x8xf32> %1 = tensor.empty [4] : tensor<4xf32> %2 = linalg.fill
-///  ins(%cst : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32> %3 =
+///  ins(%cst : f32) inits(%1 : tensor<4xf32>) -> tensor<4xf32> %3 =
 ///  linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
 ///                                        affine_map<(d0, d1) -> (d0)>],
 ///    iterator_types = ["parallel", "reduction"]}
-///    ins(%0 : tensor<4x8xf32>) outs(%2 : tensor<4xf32>) {
+///    ins(%0 : tensor<4x8xf32>) inits(%2 : tensor<4xf32>) {
 ///    ^bb0(%arg3: f32, %arg5: f32):
 ///    %5 = arith.addf %arg3, %arg4 : f32
 ///    linalg.yield %5 : f32
@@ -805,7 +805,7 @@
 /// %r = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>,
 ///                                       affine_map<(d0) -> ()>],
 ///   iterator_types = ["reduction"]}
-///   ins(%3 : tensor<4xf32>) outs(%out : tensor<f32>) {
+///   ins(%3 : tensor<4xf32>) inits(%out : tensor<f32>) {
 ///   ^bb0(%arg3: f32, %arg4: f32):
 ///   %5 = arith.addf %arg3, %arg4 : f32
 ///   linalg.yield %5 : f32
@@ -829,7 +829,7 @@
 /// Example:
 /// ```
 ///  %0 = linalg.matmul ins(%A, %B: tensor<16x256xf32>, tensor<256x32xf32>)
-///    outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
+///    inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
 /// ```
 ///
 /// Is transformed to:
@@ -843,7 +843,7 @@
 ///  #map5 = affine_map<(d0, d1, d2) -> (d0, d1)>
 ///  %0 = tensor.empty [16, 32, 64] : tensor<16x32x64xf32>
 ///  %cst = arith.constant 0.000000e+00 : f32
-///  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<16x32x64xf32>) ->
+///  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<16x32x64xf32>) ->
 ///     tensor<16x32x64xf32>
 ///  %2 = tensor.empty [64, 4] : tensor<64x4xi1>
 ///
@@ -851,7 +851,7 @@
 ///    iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
 ///    ins(%A, %B, %2 : tensor<16x256xf32>, tensor<256x32xf32>,
 ///    tensor<64x4xi1>)
-///   outs(%1 : tensor<16x32x64xf32>) {
+///   inits(%1 : tensor<16x32x64xf32>) {
 ///      ^bb0(%arg3: f32, %arg4: f32, %arg5: i1, %arg6: f32):
 ///        %5 = arith.mulf %arg3, %arg4 : f32
 ///        %6 = arith.addf %arg6, %5 : f32
@@ -861,7 +861,7 @@
 ///  %4 = linalg.generic {indexing_maps = [#map4, #map5],
 ///    iterator_types = ["parallel", "parallel", "reduction"]}
 //     ins(%3 : tensor<16x32x64xf32>)
-///    outs(%C : tensor<16x32xf32>) {
+///    inits(%C : tensor<16x32xf32>) {
 ///      ^bb0(%arg3: f32, %arg4: f32):
 ///        %5 = arith.addf %arg3, %arg4 : f32
 ///        linalg.yield %5 : f32
diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
--- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
@@ -104,7 +104,7 @@
 /// ```
 /// %0 = tensor.extract_slice %arg0 [%iv0, %iv1] [%sz0, %sz1]
 /// %1 = tensor.pad %0 low[0, 0] high[...] { tensor.yield %cst }
-/// %2 = linalg.matmul ins(...) outs(%1)
+/// %2 = linalg.matmul ins(...) inits(%1)
 /// %3 = tensor.extract_slice %2 [0, 0] [%sz0, %sz1]
 /// ```
 /// makeComposedPadHighOp(source=%3, pad=%cst) returns %2
diff --git a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td
--- a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td
+++ b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td
@@ -434,11 +434,11 @@
         tensor<?x?xT> to tensor<?x?xT>
       %sC = tensor.extract_slice %o1[h((%thread_id_1, %thread_id_2))]:
         tensor<?x?xT> to tensor<?x?xT>
-      %sD = matmul ins(%sA, %sB) outs(%sC)
+      %sD = matmul ins(%sA, %sB) inits(%sC)
 
       %spointwise = subtensor %o2[i((%thread_id_1, %thread_id_2))]:
         tensor<?xT> to tensor<?xT>
-      %sE = add ins(%spointwise) outs(%sD)
+      %sE = add ins(%spointwise) inits(%sD)
 
       scf.forall.in_parallel {
         scf.forall.parallel_insert_slice %sD into %o1[h((%thread_id_1, %thread_id_2))]:
@@ -472,7 +472,7 @@
       %sC = tensor.extract_slice %o[%i, %j][%tileSize1, %tileSize2][1, 1]
         : tensor<?x?xT> to tensor<?x?xT>
 
-      %add = map {"arith.addf"} ins(%sA, %sB) outs(%sC)
+      %add = map {"arith.addf"} ins(%sA, %sB) inits(%sC)
 
       scf.forall.in_parallel {
         scf.forall.parallel_insert_slice %add into
diff --git a/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h b/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h
--- a/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h
+++ b/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h
@@ -113,8 +113,8 @@
 /// the tiled loop nest. For example, consider
 ///
 /// ```mlir
-/// %0 = linalg.matmul ins(...) outs(...) -> tensor<?x?xf32>
-/// %1 = linalg.matmul ins(%0, ..) outs(...) -> tensor<?x?x?f32>
+/// %0 = linalg.matmul ins(...) inits(...) -> tensor<?x?xf32>
+/// %1 = linalg.matmul ins(%0, ..) inits(...) -> tensor<?x?x?f32>
 /// ```
 ///
 /// If `%1` is tiled in a 2D fashion and `%0` is fused with it, the resulting IR
@@ -124,7 +124,7 @@
 /// %t1_0 = scf.for .... iter_args(%arg0 = ...) {
 ///   %t1_1 = scf.for ... iter_args(%arg1 = %arg0) {
 ///     ...
-///     %t1_2 = linalg.matmul ins(...) outs(...) -> tensor<?x?xf32>
+///     %t1_2 = linalg.matmul ins(...) inits(...) -> tensor<?x?xf32>
 ///     %t1_3 = linalg.matmul ins(%t1_2, ...)
 ///     %t1_4 = tensor.insert_slice %t1_3 into %arg1 ...
 ///     scf.yield %t1_4
@@ -139,7 +139,7 @@
 /// ```mlir
 /// %t2_0 = scf.for .... iter_args(%arg0 = ...) {
 ///   ...
-///   %t2_1 = linalg.matmul ins(...) outs(...) -> tensor<?x?xf32>
+///   %t2_1 = linalg.matmul ins(...) inits(...) -> tensor<?x?xf32>
 ///   %t2_2 = linalg.matmul ins(%t2_1, ...)
 ///   %t2_3 = tensor.insert_slice %t2_2 into %arg0 ...
 ///   scf.yield %t2_3
@@ -180,8 +180,8 @@
 ///
 /// ```mlir
 /// %0 =
-/// %1 = linalg.fill ... outs(%0 : ... )
-/// %2 = linalg.matmul ... outs(%1 : ...) ...
+/// %1 = linalg.fill ... inits(%0 : ... )
+/// %2 = linalg.matmul ... inits(%1 : ...) ...
 /// ```
 ///
 /// it is legal to fuse the fill with the matmul only if the matmul is tiled
@@ -192,8 +192,8 @@
 /// ```mlir
 /// %1 = scf.for ... iter_args(%arg0 = %0)
 ///   %2 = tensor.extract_slice %arg0
-///   %3 = linalg.fill .. outs(%2 : ... )
-///   %4 = linalg.matmul .. outs(%3 : ...)
+///   %3 = linalg.fill .. inits(%2 : ... )
+///   %4 = linalg.matmul .. inits(%3 : ...)
 /// }
 /// ```
 FailureOr<SCFTileAndFuseResult>
diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
@@ -889,7 +889,7 @@
       %0 = linalg.generic #trait
         ins(%A: tensor<?xf64, #SparseVector>,
             %B: tensor<?xf64, #SparseVector>)
-        outs(%C: tensor<?xi8, #SparseVector>) {
+        inits(%C: tensor<?xi8, #SparseVector>) {
         ^bb0(%a: f64, %b: f64, %c: i8) :
           %result = sparse_tensor.binary %a, %b : f64, f64 to i8
             overlap={
@@ -910,7 +910,7 @@
       %C = bufferization.alloc_tensor...
       %1 = linalg.generic #trait
         ins(%A: tensor<?x?xf64, #CSR>, %B: tensor<?x?xf64, #CSR>
-        outs(%C: tensor<?x?xf64, #CSR> {
+        inits(%C: tensor<?x?xf64, #CSR> {
         ^bb0(%a: f64, %b: f64, %c: f64) :
           %row = linalg.index 0 : index
           %col = linalg.index 1 : index
@@ -943,7 +943,7 @@
       %C = bufferization.alloc_tensor...
       %2 = linalg.generic #trait
         ins(%A: tensor<?x?xf64, #CSR>, %B: tensor<?x?xi32, #CSR>
-        outs(%C: tensor<?x?xf64, #CSR> {
+        inits(%C: tensor<?x?xf64, #CSR> {
         ^bb0(%a: f64, %b: i32, %c: f64) :
           %result = sparse_tensor.binary %a, %b : f64, i32 to f64
             overlap={}
@@ -991,7 +991,7 @@
       %C = bufferization.alloc_tensor...
       %0 = linalg.generic #trait
          ins(%A: tensor<?xf64, #SparseVector>)
-        outs(%C: tensor<?xf64, #SparseVector>) {
+        inits(%C: tensor<?xf64, #SparseVector>) {
         ^bb0(%a: f64, %c: f64) :
           %result = sparse_tensor.unary %a : f64 to f64
             present={
@@ -1069,7 +1069,7 @@
       %C = bufferization.alloc_tensor...
       %0 = linalg.generic #trait
          ins(%A: tensor<?x?xf64, #SparseMatrix>)
-        outs(%C: tensor<?xf64, #SparseVector>) {
+        inits(%C: tensor<?xf64, #SparseVector>) {
         ^bb0(%a: f64, %c: f64) :
           %result = sparse_tensor.reduce %c, %a, %cf1 : f64 {
               ^bb0(%arg0: f64, %arg1: f64):
@@ -1112,7 +1112,7 @@
       %C = bufferization.alloc_tensor...
       %0 = linalg.generic #trait
          ins(%A: tensor<?xf64, #SparseVector>)
-        outs(%C: tensor<?xf64, #SparseVector>) {
+        inits(%C: tensor<?xf64, #SparseVector>) {
         ^bb0(%a: f64, %c: f64) :
           %result = sparse_tensor.select %a : f64 {
               ^bb0(%arg0: f64):
@@ -1130,7 +1130,7 @@
       %C = bufferization.alloc_tensor...
       %0 = linalg.generic #trait
          ins(%A: tensor<?x?xf64, #CSR>)
-        outs(%C: tensor<?x?xf64, #CSR>) {
+        inits(%C: tensor<?x?xf64, #CSR>) {
         ^bb0(%a: f64, %c: f64) :
           %row = linalg.index 0 : index
           %col = linalg.index 1 : index
diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td
--- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td
@@ -58,7 +58,7 @@
                              %argx: tensor<?xf64>) -> tensor<?xf64> {
       %0 = linalg.generic #matvec
         ins(%arga, %argb: tensor<?x?xf64, #SparseMatrix>, tensor<?xf64>)
-        outs(%argx: tensor<?xf64>) {
+        inits(%argx: tensor<?xf64>) {
         ^bb(%a: f64, %b: f64, %x: f64):
           %0 = arith.mulf %a, %b : f64
           %1 = arith.addf %x, %0 : f64
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp b/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp
--- a/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp
@@ -107,7 +107,7 @@
 ///
 /// E.g.:
 /// %0 = tensor.empty() : tensor<10xf32>
-/// %1 = linalg.fill ... outs(%0 : tensor<10xf32>)
+/// %1 = linalg.fill ... inits(%0 : tensor<10xf32>)
 /// %2 = tensor.insert_slice %0 into %t ...
 ///
 /// In the above example, the anchor is the source operand of the insert_slice
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -145,7 +145,9 @@
       return failure();
   }
 
-  if (succeeded(parser.parseOptionalKeyword("outs"))) {
+  // outs is deprecated.
+  if (succeeded(parser.parseOptionalKeyword("inits")) ||
+      succeeded(parser.parseOptionalKeyword("outs"))) {
     outputsOperandsLoc = parser.getCurrentLocation();
     if (parser.parseLParen() || parser.parseOperandList(outputsOperands) ||
         parser.parseColonTypeList(outputTypes) || parser.parseRParen())
@@ -172,7 +174,7 @@
   if (!inputs.empty())
     p << " ins(" << inputs << " : " << inputs.getTypes() << ")";
   if (!outputs.empty())
-    p << " outs(" << outputs << " : " << outputs.getTypes() << ")";
+    p << " inits(" << outputs << " : " << outputs.getTypes() << ")";
 }
 
 //===----------------------------------------------------------------------===//
@@ -960,7 +962,7 @@
     OpAsmParser &parser, OperationState &result,
     function_ref<ParseResult(OpAsmParser &, NamedAttrList &)> parseAttrsFn =
         nullptr) {
-  // Parse `ins` and `outs`.
+  // Parse `ins` and `inits`.
   SmallVector<Type, 4> inputTypes, outputTypes;
   if (parseCommonStructuredOpParts(parser, result, inputTypes, outputTypes,
                                    /*addOperandSegmentSizes=*/false))
@@ -1888,7 +1890,7 @@
     OpResult resultValue = castOp.getSource().cast<OpResult>();
     unsigned resultNumber = resultValue.getResultNumber();
     auto resultType = castOp->getResult(0).getType().cast<RankedTensorType>();
-    // Replace the `outs` for the result with a `tensor.cast`. This cast is now
+    // Replace the `inits` for the result with a `tensor.cast`. This cast is now
     // going from a more dynamic shape to a less dynamic shape. If the producer
     // for this cast, i.e. producer of the out operand, is also an operation
     // that folds with tensor.cast consumer (like this pattern), the cast will
diff --git a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
@@ -130,7 +130,7 @@
 ///   %0 = linalg.generic {indexing_maps = [#map1, #map2, #map0],
 ///                        iterator_types = ["parallel", "parallel"]}
 ///      ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
-///      outs(%init : tensor<?x?xf32>) {
+///      inits(%init : tensor<?x?xf32>) {
 ///    ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
 ///      %4 = arith.addf %arg3, %arg4 : f32
 ///      linalg.yield %4 : f32
@@ -263,7 +263,7 @@
 ///     %3 = linalg.generic {indexing_maps = [#map0, #map0],
 ///                          iterator_types = ["parallel", "parallel"]}
 ///         ins(%arg0 : tensor<?x?xf32>)
-///         outs(%2 : tensor<?x?xf32>) {
+///         inits(%2 : tensor<?x?xf32>) {
 ///       ^bb0(%arg3: f32, %arg4: f32):
 ///         %4 = arith.addf %arg3, %arg3 : f32
 ///         linalg.yield %4 : f32
@@ -290,7 +290,7 @@
 ///     %3 = linalg.generic {indexing_maps = [#map2, #map2],
 ///       iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
 ///       ins(%pack : tensor<?x?x8x2xf32>)
-///       outs(%arg1 : tensor<?x?x8x2xf32>) {
+///       inits(%arg1 : tensor<?x?x8x2xf32>) {
 ///     ^bb0(%in: f32, %out: f32):
 ///       %4 = arith.addf %in, %in : f32
 ///       linalg.yield %4 : f32
@@ -410,7 +410,7 @@
 ///                          inner_dims_pos = [3] inner_tiles = [32] into %0
 /// %2 = linalg.generic {indexing_maps = [#map],
 ///      iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
-///      outs(%1 : tensor<12x56x56x64xf32>) {
+///      inits(%1 : tensor<12x56x56x64xf32>) {
 ///      ^bb0(%out : f32):
 ///         linalg.yield %out : f32
 ///      } -> tensor<12x56x56x64xf32>
@@ -423,7 +423,7 @@
 /// %1 = linalg.generic {indexing_maps = [#map],
 ///      iterator_types = ["parallel", "parallel", "parallel",
 ///                        "parallel", "parallel"]}
-///      outs(%arg0 : tensor<12x2x56x56x32xf32>) {
+///      inits(%arg0 : tensor<12x2x56x56x32xf32>) {
 ///      ^bb0(%out : f32):
 ///         linalg.yield %out : f32
 ///      } -> tensor<12x2x56x56x32xf32>
diff --git a/mlir/lib/Dialect/Linalg/Transforms/DecomposeLinalgOps.cpp b/mlir/lib/Dialect/Linalg/Transforms/DecomposeLinalgOps.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/DecomposeLinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/DecomposeLinalgOps.cpp
@@ -35,7 +35,7 @@
 ///
 /// ```mlir
 ///  %result:2 = linalg.generic ... ins(%arg0, %arg1, %arg2 : ...)
-///      outs(%init0, %init1 : ...) {
+///      inits(%init0, %init1 : ...) {
 ///    ^bb0(%b0: ... , %b1: ... , %b2: ... , %b3: ..., %b4: ...):
 ///      %0 = <s0> %b0, %b1 : ...
 ///      %1 = <s1> %0, %b2 : ...
@@ -49,13 +49,13 @@
 /// ```mlir
 /// %init = tensor.empty ...
 /// %op0:3 = linalg.generic ... ins(%arg0, %arg1, %arg2 : ...)
-///      outs(%init0, %init1, %init : ...)
+///      inits(%init0, %init1, %init : ...)
 ///    ^bb0(%b0: ... , %b1: ... , %b2: ... , %b3: ..., %b4: ..., %b5: ...):
 ///      %0 = <s0> %b0, %b1 : ...
 ///      linalg.yield %0, %..., %0 : ...
 ///  } -> (..., ..., ...)
 /// %op1:2 = linalg.generic ... ins(%arg0, %arg1, %arg2, %op0#2 : ...)
-///      outs(%init0, %init1 : ...) {
+///      inits(%init0, %init1 : ...) {
 ///    ^bb0(%b0: ... , %b1: ... , %b2: ... , %b3: ..., %b4: ..., %b5: ...):
 ///      %1 = <s1> %b3, %b2 : ...
 ///      linalg.yield %..., %1 : ...
@@ -68,13 +68,13 @@
 /// ```mlir
 /// %init = tensor.empty ...
 /// %op0 = linalg.generic ... ins(%arg0, %arg1, : ...)
-///      outs(%init : ...)
+///      inits(%init : ...)
 ///    ^bb0(%b0: ... , %b1: ... , %b2: ...):
 ///      %0 = <s0> %b0, %b1 : ...
 ///      linalg.yield %0 : ...
 ///  } -> ...
 /// %op1 = linalg.generic ... ins(%arg2, %op0#2 : ...)
-///      outs(%init1 : ...) {
+///      inits(%init1 : ...) {
 ///    ^bb0(%b0: ... , %b1: ... , %b2: ...):
 ///      %1 = <s1> %b1, %b0 : ...
 ///      linalg.yield %..., %1 : ...
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp
@@ -190,7 +190,7 @@
     ///   %7 = tensor.empty() : tensor<i32>
     ///   %8 = linalg.generic #attrs
     ///     ins(%6, %6 : tensor<i32>, tensor<i32>)
-    ///     outs(%7 : tensor<i32>) {
+    ///     inits(%7 : tensor<i32>) {
     ///     ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):
     ///       %9 = arith.addi %arg0, %arg1 : i32
     ///       linalg.yield %9 : i32
diff --git a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
@@ -241,12 +241,12 @@
 ///  %0 = tensor.empty() : tensor<1x1xf32>
 ///  %1 = linalg.fill
 ///    ins(%cst : f32)
-///    outs(%0 : tensor<1x1xf32>) -> tensor<1x1xf32>
+///    inits(%0 : tensor<1x1xf32>) -> tensor<1x1xf32>
 ///  %2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (0, d0, 0, 0)>,
 ///                                        affine_map<(d0) -> (0, d0)>],
 ///                       iterator_types = ["parallel"]}
 ///    ins(%arg0 : tensor<1x?x1x1xf32>)
-///    outs(%1 : tensor<1x1xf32>) {
+///    inits(%1 : tensor<1x1xf32>) {
 ///  ^bb0(%in: f32, %out: f32):
 ///    %3 = arith.addf %in, %out : f32
 ///    linalg.yield %3 : f32
@@ -257,14 +257,14 @@
 ///  %0 = tensor.empty() : tensor<1x1xf32>
 ///  %1 = linalg.fill
 ///    ins(%cst : f32)
-///    outs(%0 : tensor<1x1xf32>) -> tensor<1x1xf32>
+///    inits(%0 : tensor<1x1xf32>) -> tensor<1x1xf32>
 ///  %2 = tensor.empty() : tensor<1x1xf32>
 ///  %3 = linalg.generic {indexing_maps = [affine_map<(d0) -> (0, d0, 0, 0)>,
 ///                                        affine_map<(d0) -> (0, d0)>,
 ///                                        affine_map<(d0) -> (0, d0)>],
 ///                       iterator_types = ["parallel"]}
 ///   ins(%arg0, %1 : tensor<1x?x1x1xf32>, tensor<1x1xf32>)
-///   outs(%2 : tensor<1x1xf32>) {
+///   inits(%2 : tensor<1x1xf32>) {
 ///  ^bb0(%in: f32, %in_0: f32, %out: f32):
 ///    %4 = arith.addf %in, %in_0 : f32
 ///    linalg.yield %4 : f32
diff --git a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
@@ -1059,7 +1059,7 @@
 // %3 = linalg.generic {
 //     indexing_maps = [#map, #map],
 //     iterator_types = ["parallel" ,"parallel"]}
-//     ins(%1 : tensor<?x4xf32>) outs(%2 : tensor<?x4xf32>) {.. }
+//     ins(%1 : tensor<?x4xf32>) inits(%2 : tensor<?x4xf32>) {.. }
 // ```
 //
 // can be fused by collapsing the dimensions of the iteration space.
@@ -1070,7 +1070,7 @@
 // %3 = linalg.generic {
 //     indexing_maps = [#map, #map],
 //     iterator_types = ["parallel"]}
-//     ins(%1 : tensor<?xf32>) outs(%2 : tensor<?xf32>) {.. }
+//     ins(%1 : tensor<?xf32>) inits(%2 : tensor<?xf32>) {.. }
 // %4 = tensor.expand_shape %3 [[0, 1]] : tensor<?xf32> into tensor<?x4xf32>
 // ```
 //
@@ -1084,7 +1084,7 @@
 // %2 = linalg.generic {
 //     indexing_maps = [#map0, #map1],
 //     iterator_types = ["parallel" ,"parallel"]}
-//     ins(%1 : tensor<?x4xf32>) outs(%2 : tensor<4x?xf32>) {.. }
+//     ins(%1 : tensor<?x4xf32>) inits(%2 : tensor<4x?xf32>) {.. }
 // ```
 //
 // the reshape cannot be fused with the generic op by collapsing the op
diff --git a/mlir/lib/Dialect/Linalg/Transforms/EraseUnusedOperandsAndResults.cpp b/mlir/lib/Dialect/Linalg/Transforms/EraseUnusedOperandsAndResults.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/EraseUnusedOperandsAndResults.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/EraseUnusedOperandsAndResults.cpp
@@ -361,7 +361,7 @@
 
 /// Fold uses of duplicate inputs in the body of a linalg.generic. E.g.:
 /// ```
-/// linalg.generic ins(%a, %b, %a, %b) outs(%a)
+/// linalg.generic ins(%a, %b, %a, %b) inits(%a)
 /// ^bb0(%in0, %in1, %in2, %in3, %out1)
 /// ```
 /// Assuming that all %a and %b have the same index map:
diff --git a/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp
@@ -100,15 +100,15 @@
 /// along the `tiledSliceDimIndices` and clone the producer. Consider the case
 /// of fusion of an output tensor:
 /// ```
-/// %1 = producer ins(...) outs(%0)
-/// %2 = consumer ins(...) outs(%1)
+/// %1 = producer ins(...) inits(%0)
+/// %2 = consumer ins(...) inits(%1)
 /// ```
 /// When consumer is tiled, %1 appears in the loop iter_args:
 /// ```
-/// %1 = producer ins(...) outs(%0)
+/// %1 = producer ins(...) inits(%0)
 /// %2 = scf.for ... iter_args(%1) .. (%bbarg) {
 ///   %t1 = tensor.extract_slice %bbarg[..]
-///   %t2 = consumer ins(...) outs(%t1)
+///   %t2 = consumer ins(...) inits(%t1)
 ///   %r = tensor.insert_slice %t2, %bbarg[...]
 /// }
 /// ```
@@ -116,8 +116,8 @@
 /// ```
 /// %2 = scf.for ... iter_args(%0) .. (%bbarg) {
 ///   %t0 = tensor.extract_slice %bbarg[..]
-///   %t1 = producer ins(...) outs(%t0)
-///   %t2 = consumer ins(...) outs(%t1)
+///   %t1 = producer ins(...) inits(%t0)
+///   %t2 = consumer ins(...) inits(%t1)
 ///   %r = tensor.insert_slice %t2, %bbarg[...]
 /// }
 /// ```
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -423,7 +423,7 @@
 ///        indexing_maps = affine_map<(d0, d1, d2, d3, d4) -> (d1, d3)>
 ///      }
 ///     ins(%0 : tensor<2x3x4xf32>)
-///    outs(%1 : tensor<5x6xf32>)
+///    inits(%1 : tensor<5x6xf32>)
 /// ```
 ///
 /// the iteration domain size of the linalg op is 3x5x4x6x2. The first affine
diff --git a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp
--- a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp
@@ -534,11 +534,11 @@
   //
   // ```mlir
   // %0 = linalg.init
-  // %1 = linalg.fill .. outs(%0 : )
+  // %1 = linalg.fill .. inits(%0 : )
   // %2 = scf.for .. iter_args(%arg0 = %1) {
   //   %3 = scf.for .. iter_args(%arg1 = %arg0) {
   //     %4 = tensor.extract_slice %arg1 [..]
-  //     .. = linalg.matmul .. outs(%4 : )
+  //     .. = linalg.matmul .. inits(%4 : )
   //   }
   // }
   // ```
@@ -551,8 +551,8 @@
   // %2 = scf.for .. iter_args(%arg0 = %1 /* incorrect value */ ) {
   //   %3 = scf.for .. iter_args(%arg1 = %arg0) {
   //     %4 = tensor.extract_slice %0 /*incorrect value */ [..]
-  //     %5 = linalg.fill .. outs(%4 : )
-  //     .. = linalg.matmul .. outs(%5 : )
+  //     %5 = linalg.fill .. inits(%4 : )
+  //     .. = linalg.matmul .. inits(%5 : )
   //   }
   // }
   // ```
@@ -572,8 +572,8 @@
   // %1 = scf.for .. iter_args(%arg0 = %0 /* corrected value */ ) {
   //   %2 = scf.for .. iter_args(%arg1 = %arg0) {
   //     %3 = tensor.extract_slice %arg1 /* corrected value */ [..]
-  //     %4 = linalg.fill .. outs(%3 : )
-  //     .. = linalg.matmul .. outs(%4 : )
+  //     %4 = linalg.fill .. inits(%3 : )
+  //     .. = linalg.matmul .. inits(%4 : )
   //   }
   // }
   // ```
diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
--- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -528,7 +528,7 @@
 ///
 /// Is lowered to:
 /// ```
-/// linalg.map ins() outs(%dest) {
+/// linalg.map ins() inits(%dest) {
 ///   %d0 = linalg.index 0 : index
 ///   %d1 = linalg.index 1 : index
 ///   %0 = "some_op"(%d0, %d1) : (index, index) -> (index)
diff --git a/mlir/python/mlir/dialects/linalg/__init__.py b/mlir/python/mlir/dialects/linalg/__init__.py
--- a/mlir/python/mlir/dialects/linalg/__init__.py
+++ b/mlir/python/mlir/dialects/linalg/__init__.py
@@ -33,7 +33,7 @@
 #          `linalg.matmul(lhs, rhs, outs=[out])` creates the following IR:
 #          ```
 #             %1 = linalg.matmul ins(%arg0, %arg1 : tensor<4x16xf32>, tensor<16x8xf32>)
-#                               outs(%0 : tensor<4x8xf32>)
+#                               inits(%0 : tensor<4x8xf32>)
 #                  -> tensor<4x8xf32>
 #          ```
 #       b. by setting emit_generic=True, the Generic Op Form is emitted, e.g.:
@@ -41,7 +41,7 @@
 #          ```
 #             %1 = linalg.generic {indexing_maps = [...], iterator_types = [...]}
 #               ins(%arg0, %arg1 : tensor<4x16xf32>, tensor<16x8xf32>)
-#              outs(%0 : tensor<4x8xf32>) {
+#              inits(%0 : tensor<4x8xf32>) {
 #               ^bb0(%arg2: f32, %arg3: f32, %arg4: f32):
 #                  ...
 #                  linalg.yield %3 : f32
diff --git a/mlir/python/mlir/dialects/linalg/opdsl/lang/dsl.py b/mlir/python/mlir/dialects/linalg/opdsl/lang/dsl.py
--- a/mlir/python/mlir/dialects/linalg/opdsl/lang/dsl.py
+++ b/mlir/python/mlir/dialects/linalg/opdsl/lang/dsl.py
@@ -41,7 +41,7 @@
         "but none is set. Did you mean to call this in an op definition?")
 
 
-def _prepare_structured_op_outs(outs: StructuredOpOuts) -> ValueList:
+def _prepare_structured_op_inits(outs: StructuredOpOuts) -> ValueList:
   if isinstance(outs, (ir.Operation, ir.OpView)):
     return _get_op_results_or_values(outs)
   elif isinstance(outs, ir.OpResultList):
@@ -87,7 +87,7 @@
         emit_generic or not ctx.is_registered_operation(fully_qualified_name))
 
     op_config = op_configs[0]
-    out_values = _prepare_structured_op_outs(outs)
+    out_values = _prepare_structured_op_inits(outs)
     in_values = [_get_op_result_or_value(i) for i in ins]
     if op_config.structured_op:
       if emit_generic:
diff --git a/mlir/python/mlir/dialects/linalg/opdsl/lang/emitter.py b/mlir/python/mlir/dialects/linalg/opdsl/lang/emitter.py
--- a/mlir/python/mlir/dialects/linalg/opdsl/lang/emitter.py
+++ b/mlir/python/mlir/dialects/linalg/opdsl/lang/emitter.py
@@ -105,7 +105,7 @@
     raise ValueError(f"Expected indexing_maps to use no symbols after "
                      f"replacement and compression but got {indexing_maps}")
 
-  outs, out_types = _infer_structured_outs(op_config, in_arg_defs, ins,
+  outs, out_types = _infer_structured_inits(op_config, in_arg_defs, ins,
                                            out_arg_defs, outs)
 
   result_types = [t for t in out_types if isa(RankedTensorType, t)]
@@ -474,7 +474,7 @@
         "Unsupported 'min_unsigned' operands: {lhs}, {rhs}")
 
 
-def _infer_structured_outs(
+def _infer_structured_inits(
     op_config: LinalgStructuredOpConfig,
     in_arg_defs: Sequence[OperandDefConfig], ins: Sequence[Value],
     out_arg_defs: Sequence[OperandDefConfig],
diff --git a/mlir/test/Analysis/test-match-reduction.mlir b/mlir/test/Analysis/test-match-reduction.mlir
--- a/mlir/test/Analysis/test-match-reduction.mlir
+++ b/mlir/test/Analysis/test-match-reduction.mlir
@@ -12,7 +12,7 @@
                                           affine_map<(d0) -> (0)>],
                                           iterator_types = ["reduction"]}
    ins(%in0t : tensor<?xf32>)
-   outs(%out0t : tensor<1xf32>) {
+   inits(%out0t : tensor<1xf32>) {
     ^bb0(%in0: f32, %out0: f32):
       %add = arith.addf %in0, %out0 : f32
       linalg.yield %add : f32
@@ -49,7 +49,7 @@
                                           affine_map<(d0, d1) -> (d0)>],
    iterator_types = ["parallel", "reduction"]}
    ins(%in0t : tensor<4x4xf32>)
-   outs(%out0t : tensor<4xf32>) {
+   inits(%out0t : tensor<4xf32>) {
     ^bb0(%in0: f32, %out0: f32):
       %cmp = arith.cmpf ogt, %in0, %out0 : f32
       %sel = arith.select %cmp, %in0, %out0 : f32
@@ -69,7 +69,7 @@
                                           affine_map<(d0, d1) -> (d0)>],
    iterator_types = ["parallel", "reduction"]}
    ins(%in0t : tensor<4x4xf32>)
-   outs(%out0t : tensor<4xf32>) {
+   inits(%out0t : tensor<4xf32>) {
     ^bb0(%in0: f32, %out0: f32):
       %mul = arith.mulf %in0, %in0 : f32
       %sub = arith.subf %mul, %in0 : f32
diff --git a/mlir/test/Conversion/TensorToLinalg/tensor-ops-to-linalg.mlir b/mlir/test/Conversion/TensorToLinalg/tensor-ops-to-linalg.mlir
--- a/mlir/test/Conversion/TensorToLinalg/tensor-ops-to-linalg.mlir
+++ b/mlir/test/Conversion/TensorToLinalg/tensor-ops-to-linalg.mlir
@@ -7,7 +7,7 @@
 // CHECK-SAME:                                             %[[IN:.*]]: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> {
 // CHECK:           %[[C0:.*]] = arith.constant 0.000000e+00 : f32
 // CHECK:           %[[INIT:.*]] = tensor.empty() : tensor<1x32x32x1xf32>
-// CHECK:           %[[FILL:.*]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x32x32x1xf32>) -> tensor<1x32x32x1xf32>
+// CHECK:           %[[FILL:.*]] = linalg.fill ins(%[[C0]] : f32) inits(%[[INIT]] : tensor<1x32x32x1xf32>) -> tensor<1x32x32x1xf32>
 // CHECK:           %[[PADDED:.*]] = tensor.insert_slice %[[IN]] into %[[FILL]][0, 2, 2, 0] [1, 28, 28, 1] [1, 1, 1, 1] : tensor<1x28x28x1xf32> into tensor<1x32x32x1xf32>
 // CHECK:           return %[[PADDED]] : tensor<1x32x32x1xf32>
 func.func @generalize_pad_tensor_static_shape(%arg0: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> {
@@ -32,7 +32,7 @@
 // CHECK:           %[[DIM3:.*]] = tensor.dim %[[IN]], %[[C3]] : tensor<4x?x2x?xf32>
 // CHECK:           %[[OUT_DIM3:.*]] = arith.addi %[[DIM3]], %[[OFFSET]] : index
 // CHECK:           %[[INIT:.*]] = tensor.empty(%[[DIM1]], %[[OUT_DIM2]], %[[OUT_DIM3]]) : tensor<4x?x?x?xf32>
-// CHECK:           %[[FILL:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[INIT]] : tensor<4x?x?x?xf32>) -> tensor<4x?x?x?xf32>
+// CHECK:           %[[FILL:.*]] = linalg.fill ins(%[[CST]] : f32) inits(%[[INIT]] : tensor<4x?x?x?xf32>) -> tensor<4x?x?x?xf32>
 // CHECK:           %[[PADDED:.*]] = tensor.insert_slice %[[IN]] into %[[FILL]]{{\[}}%[[C0]], %[[C0]], %[[OFFSET]], %[[C0]]] [4, %[[DIM1]], 2, %[[DIM3]]] [1, 1, 1, 1] : tensor<4x?x2x?xf32> into tensor<4x?x?x?xf32>
 // CHECK:           return %[[PADDED]] : tensor<4x?x?x?xf32>
 // CHECK:         }
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
@@ -4,8 +4,8 @@
 func.func @matmul(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) {
   // CHECK: [[C0:%.+]] = arith.constant 0
   // CHECK: [[INIT:%.+]] = tensor.empty()
-  // CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : f32) outs([[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
-  // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x6xf32>) outs([[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
+  // CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : f32) inits([[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
+  // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x6xf32>) inits([[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
   %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x6xf32>)  -> (tensor<1x5x6xf32>)
   return %0 : tensor<1x5x6xf32>
 }
@@ -17,10 +17,10 @@
 func.func @matmul_quantized(%arg0: tensor<1x5x3xi8>, %arg1: tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) {
   // CHECK: [[C0:%.+]] = arith.constant 0
   // CHECK: [[INIT:%.+]] = tensor.empty()
-  // CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : i32) outs([[INIT]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32>
+  // CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : i32) inits([[INIT]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32>
   // CHECK: [[ONE:%.+]] = arith.constant 1
   // CHECK: [[TWO:%.+]] = arith.constant 2
-  // CHECK: linalg.quantized_batch_matmul ins(%arg0, %arg1, [[ONE]], [[TWO]] : tensor<1x5x3xi8>, tensor<1x3x6xi8>, i32, i32) outs([[FILLED]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32>
+  // CHECK: linalg.quantized_batch_matmul ins(%arg0, %arg1, [[ONE]], [[TWO]] : tensor<1x5x3xi8>, tensor<1x3x6xi8>, i32, i32) inits([[FILLED]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32>
   %0 = "tosa.matmul"(%arg0, %arg1) {quantization_info = #tosa.matmul_quant<a_zp = 1, b_zp = 2>} : (tensor<1x5x3xi8>, tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>)
   return %0 : tensor<1x5x6xi32>
 }
@@ -33,8 +33,8 @@
   // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]]
   // CHECK: %[[C0_0:.+]] = arith.constant 0
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]])
-  // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0_0]] : f32) outs(%[[INIT]] : tensor<?x5x6xf32>) -> tensor<?x5x6xf32>
-  // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<?x5x3xf32>, tensor<?x3x6xf32>) outs(%[[FILLED]] : tensor<?x5x6xf32>) -> tensor<?x5x6xf32>
+  // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0_0]] : f32) inits(%[[INIT]] : tensor<?x5x6xf32>) -> tensor<?x5x6xf32>
+  // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<?x5x3xf32>, tensor<?x3x6xf32>) inits(%[[FILLED]] : tensor<?x5x6xf32>) -> tensor<?x5x6xf32>
   %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<?x5x3xf32>, tensor<?x3x6xf32>)  -> (tensor<?x5x6xf32>)
   return %0 : tensor<?x5x6xf32>
 }
@@ -47,8 +47,8 @@
   // CHECK: %[[DIM:.+]] = tensor.dim %arg1, %[[C2]]
   // CHECK: %[[C0:.+]] = arith.constant 0
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]])
-  // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32>
-  // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x?xf32>) outs(%[[FILLED]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32>
+  // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) inits(%[[INIT]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32>
+  // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x?xf32>) inits(%[[FILLED]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32>
   %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x?xf32>)  -> (tensor<1x5x?xf32>)
   return %0 : tensor<1x5x?xf32>
 }
@@ -59,8 +59,8 @@
 func.func @matmul_dyn_independent_dim(%arg0: tensor<1x5x?xf32>, %arg1: tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>) {
   // CHECK: %[[C0:.+]] = arith.constant 0
   // CHECK: %[[INIT:.+]] = tensor.empty()
-  // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
-  // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x?xf32>, tensor<1x?x6xf32>) outs(%[[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
+  // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) inits(%[[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
+  // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x?xf32>, tensor<1x?x6xf32>) inits(%[[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
   %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x?xf32>, tensor<1x?x6xf32>)  -> (tensor<1x5x6xf32>)
   return %0 : tensor<1x5x6xf32>
 }
@@ -74,12 +74,12 @@
 func.func @fully_connected(%arg0: tensor<5x3xf32>, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor<5x6xf32>) {
   // CHECK: [[INITT:%.+]] = tensor.empty()
   // CHECK: [[ZERO:%.+]] = arith.constant 0
-  // CHECK: [[FILL:%.+]] = linalg.fill ins([[ZERO]]{{.*}}outs([[INITT]]
+  // CHECK: [[FILL:%.+]] = linalg.fill ins([[ZERO]]{{.*}}inits([[INITT]]
   // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]>
   // CHECK: [[TRANSPOSE:%.+]] = "tosa.transpose"(%arg1, [[PERM]])
   // CHECK: [[INITB:%.+]] = tensor.empty()
-  // CHECK: [[MATMUL:%.+]] = linalg.matmul ins(%arg0, [[TRANSPOSE]] : tensor<5x3xf32>, tensor<3x6xf32>) outs([[FILL]] : tensor<5x6xf32>) -> tensor<5x6xf32>
-  // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xf32>, tensor<5x6xf32>) outs([[INITB]] : tensor<5x6xf32>) {
+  // CHECK: [[MATMUL:%.+]] = linalg.matmul ins(%arg0, [[TRANSPOSE]] : tensor<5x3xf32>, tensor<3x6xf32>) inits([[FILL]] : tensor<5x6xf32>) -> tensor<5x6xf32>
+  // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xf32>, tensor<5x6xf32>) inits([[INITB]] : tensor<5x6xf32>) {
   // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32):
   // CHECK:   [[ADD:%.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32
   // CHECK:   linalg.yield [[ADD]] : f32
@@ -97,14 +97,14 @@
 func.func @quantized_fully_connected(%arg0: tensor<5x3xi8>, %arg1: tensor<6x3xi8>, %arg2: tensor<6xi32>) -> (tensor<5x6xi32>) {
   // CHECK: [[INITT:%.+]] = tensor.empty()
   // CHECK: [[ZERO:%.+]] = arith.constant 0
-  // CHECK: [[FILL:%.+]] = linalg.fill ins([[ZERO]]{{.*}}outs([[INITT]]
+  // CHECK: [[FILL:%.+]] = linalg.fill ins([[ZERO]]{{.*}}inits([[INITT]]
   // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]>
   // CHECK: [[TRANSPOSE:%.+]] = "tosa.transpose"(%arg1, [[PERM]])
   // CHECK: [[INITB:%.+]] = tensor.empty()
   // CHECK: [[ONE:%.+]] = arith.constant 1
   // CHECK: [[TWO:%.+]] = arith.constant 2
-  // CHECK: [[MATMUL:%.+]] = linalg.quantized_matmul ins(%arg0, [[TRANSPOSE]], [[ONE]], [[TWO]] : tensor<5x3xi8>, tensor<3x6xi8>, i32, i32) outs([[FILL]] : tensor<5x6xi32>) -> tensor<5x6xi32>
-  // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xi32>, tensor<5x6xi32>) outs([[INITB]]
+  // CHECK: [[MATMUL:%.+]] = linalg.quantized_matmul ins(%arg0, [[TRANSPOSE]], [[ONE]], [[TWO]] : tensor<5x3xi8>, tensor<3x6xi8>, i32, i32) inits([[FILL]] : tensor<5x6xi32>) -> tensor<5x6xi32>
+  // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xi32>, tensor<5x6xi32>) inits([[INITB]]
   // CHECK: ^bb0([[IN1:%.+]]: i32, [[IN2:%.+]]: i32, [[UNUSED:%.+]]: i32):
   // CHECK:   [[ADD:%.+]] = arith.addi
   // CHECK:   linalg.yield [[ADD]] : i32
@@ -123,12 +123,12 @@
   // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]]
   // CHECK: %[[INITT:.+]] = tensor.empty(%[[DIM]])
   // CHECK: %[[ZERO:.+]] = arith.constant 0
-  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[ZERO]]{{.*}}outs(%[[INITT]]
+  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[ZERO]]{{.*}}inits(%[[INITT]]
   // CHECK: %[[PERM:.+]] = arith.constant dense<[1, 0]>
   // CHECK: %[[TRANSPOSE:.+]] = "tosa.transpose"(%arg1, %[[PERM]])
   // CHECK: %[[INITB:.+]] = tensor.empty(%[[DIM]])
-  // CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%arg0, %[[TRANSPOSE]] : tensor<?x3xf32>, tensor<3x6xf32>) outs(%[[FILL]] : tensor<?x6xf32>) -> tensor<?x6xf32>
-  // CHECK: %[[ADDED:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, %[[MATMUL]] : tensor<6xf32>, tensor<?x6xf32>) outs(%[[INITB]] : tensor<?x6xf32>) {
+  // CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%arg0, %[[TRANSPOSE]] : tensor<?x3xf32>, tensor<3x6xf32>) inits(%[[FILL]] : tensor<?x6xf32>) -> tensor<?x6xf32>
+  // CHECK: %[[ADDED:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, %[[MATMUL]] : tensor<6xf32>, tensor<?x6xf32>) inits(%[[INITB]] : tensor<?x6xf32>) {
   // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32):
   // CHECK:   %[[ADD:.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32
   // CHECK:   linalg.yield %[[ADD]] : f32
@@ -143,9 +143,9 @@
 func.func @max_pool(%arg0: tensor<1x6x34x62xf32>) -> () {
   // CHECK-DAG: [[CONST:%.+]] = arith.constant -3.40282347E+38
   // CHECK-DAG: [[INIT:%.+]] = tensor.empty()
-  // CHECK-DAG: [[FILL:%.+]] = linalg.fill ins([[CONST]]{{.*}}outs([[INIT]]
+  // CHECK-DAG: [[FILL:%.+]] = linalg.fill ins([[CONST]]{{.*}}inits([[INIT]]
   // CHECK-DAG: [[KERNEL:%.+]] = tensor.empty()
-  // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, [[KERNEL]] : tensor<1x6x34x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x32x62xf32>)
+  // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, [[KERNEL]] : tensor<1x6x34x62xf32>, tensor<3x3xf32>) inits([[FILL]] : tensor<1x4x32x62xf32>)
   %0 = "tosa.max_pool2d"(%arg0) {pad = array<i64: 0, 0, 0, 0>, kernel = array<i64: 3, 3>, stride = array<i64: 1, 1>} : (tensor<1x6x34x62xf32>)  -> (tensor<1x4x32x62xf32>)
   return
 }
@@ -157,9 +157,9 @@
   // CHECK-DAG:   tensor.yield [[CONST]]
   // CHECK-DAG: [[INITVAL:%.+]] = arith.constant -3.40282347E+38 : f32
   // CHECK-DAG: [[INIT:%.+]] = tensor.empty()
-  // CHECK-DAG: [[FILL:%.+]] = linalg.fill ins([[INITVAL]]{{.*}}outs([[INIT]]
+  // CHECK-DAG: [[FILL:%.+]] = linalg.fill ins([[INITVAL]]{{.*}}inits([[INIT]]
   // CHECK-DAG: [[KERNEL:%.+]] = tensor.empty()
-  // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x6x35x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x33x62xf32>)
+  // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x6x35x62xf32>, tensor<3x3xf32>) inits([[FILL]] : tensor<1x4x33x62xf32>)
   %0 = "tosa.max_pool2d"(%arg0) {pad = array<i64: 0, 0, 0, 1>, kernel = array<i64: 3, 3>, stride = array<i64: 1, 1>} : (tensor<1x6x34x62xf32>)  -> (tensor<1x4x33x62xf32>)
   return
 }
@@ -170,9 +170,9 @@
   // CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]]
   // CHECK: %[[CONST:.+]] = arith.constant -3.40282347E+38
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]])
-  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CONST]]{{.*}}outs(%[[INIT]]
+  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CONST]]{{.*}}inits(%[[INIT]]
   // CHECK: %[[KERNEL:.+]] = tensor.empty()
-  // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %[[KERNEL]] : tensor<?x6x34x62xf32>, tensor<3x3xf32>) outs(%[[FILL]] : tensor<?x4x32x62xf32>)
+  // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %[[KERNEL]] : tensor<?x6x34x62xf32>, tensor<3x3xf32>) inits(%[[FILL]] : tensor<?x4x32x62xf32>)
   %0 = "tosa.max_pool2d"(%arg0) {pad = array<i64: 0, 0, 0, 0>, kernel = array<i64: 3, 3>, stride = array<i64: 1, 1>} : (tensor<?x6x34x62xf32>)  -> (tensor<?x4x32x62xf32>)
   return
 }
@@ -213,14 +213,14 @@
   // Fill the pooling target:
   // CHECK: %[[F0:.+]] = arith.constant 0.000000e+00 : f32
   // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x5x33x62xf32>
-  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]] : f32) outs(%[[EMPTY]] : tensor<1x5x33x62xf32>)
+  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]] : f32) inits(%[[EMPTY]] : tensor<1x5x33x62xf32>)
 
   // Compute the sum padding:
   // CHECK: %[[KERNEL:.+]] = tensor.empty() : tensor<4x4xf32>
   // CHECK: %[[POOL:.+]] = linalg.pooling_nhwc_sum
   // CHECK-SAME: dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>}
   // CHECK-SAME: ins(%[[PAD]], %[[KERNEL]] : tensor<1x8x36x62xf32>, tensor<4x4xf32>)
-  // CHECK-SAME: outs(%[[FILL]] : tensor<1x5x33x62xf32>)
+  // CHECK-SAME: inits(%[[FILL]] : tensor<1x5x33x62xf32>)
 
   // Compute dimension based constants:
   // CHECK: %[[I1:.+]] = arith.constant 1 : index
@@ -236,7 +236,7 @@
   // CHECK: %[[GENERIC:.+]] = linalg.generic
   // CHECK-SAME: indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
   // CHECK-SAME: ins(%[[POOL]] : tensor<1x5x33x62xf32>)
-  // CHECK-SAME: outs(%[[EMPTY]] : tensor<1x5x33x62xf32>)
+  // CHECK-SAME: inits(%[[EMPTY]] : tensor<1x5x33x62xf32>)
   // CHECK: ^bb0(%[[IN:.+]]: f32, %{{.+}}: f32)
   // CHECK:   %[[ZERO:.+]] = arith.constant 0
 
@@ -297,7 +297,7 @@
   // CHECK: %[[GENERIC:.+]] = linalg.generic
   // CHECK-SAME: indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
   // CHECK-SAME: ins(%[[POOL:.+]] : tensor<1x5x33x62xi32>)
-  // CHECK-SAME: outs(%[[EMPTY:.+]] : tensor<1x5x33x62xi8>)
+  // CHECK-SAME: inits(%[[EMPTY:.+]] : tensor<1x5x33x62xi8>)
   // CHECK: ^bb0(%[[IN:.+]]: i32, %{{.+}}: i8)
 
   // Only different behavior is how the division is performed.
@@ -344,12 +344,12 @@
   // CHECK:   tensor.yield %[[F0]]
   // CHECK: %[[F0:.+]] = arith.constant 0.000000e+00 : f32
   // CHECK: %[[EMPTY:.+]] = tensor.empty(%[[BATCH]]) : tensor<?x5x33x62xf32>
-  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]] : f32) outs(%[[EMPTY]] : tensor<?x5x33x62xf32>)
+  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]] : f32) inits(%[[EMPTY]] : tensor<?x5x33x62xf32>)
   // CHECK: %[[KERNEL:.+]] = tensor.empty() : tensor<4x4xf32>
   // CHECK: %[[POOL:.+]] = linalg.pooling_nhwc_sum
   // CHECK-SAME: dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>
   // CHECK-SAME: ins(%[[PADDED]], %[[KERNEL]] : tensor<?x8x36x62xf32>, tensor<4x4xf32>)
-  // CHECK-SAME: outs(%[[FILL]] : tensor<?x5x33x62xf32>) -> tensor<?x5x33x62xf32>
+  // CHECK-SAME: inits(%[[FILL]] : tensor<?x5x33x62xf32>) -> tensor<?x5x33x62xf32>
   // CHECK: %[[EMPTY:.+]] = tensor.empty(%[[BATCH]]) : tensor<?x5x33x62xf32>
   // CHECK: %[[GENERIC:.+]] = linalg.generic
   %0 = "tosa.avg_pool2d"(%arg0) {pad = array<i64: 1, 1, 1, 1>, kernel = array<i64: 4, 4>, stride = array<i64: 1, 1>} : (tensor<?x6x34x62xf32>)  -> (tensor<?x5x33x62xf32>)
@@ -369,8 +369,8 @@
   // CHECK: %[[CST:.+]] = arith.constant 0
   // CHECK: %[[FILL:.+]] = linalg.fill
   // CHECK: %[[B_IN:.+]] = tensor.empty()
-  // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<1x49x42x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<1x45x40x28xf32>)
-  // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x45x40x28xf32>) outs(%[[B_IN]] : tensor<1x45x40x28xf32>)
+  // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<1x49x42x27xf32>, tensor<3x3x27x28xf32>) inits(%[[FILL]] : tensor<1x45x40x28xf32>)
+  // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x45x40x28xf32>) inits(%[[B_IN]] : tensor<1x45x40x28xf32>)
   // CHECK:   arith.addf
   // CHECK:   linalg.yield
   %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = array<i64: 0, 0, 0, 0>, stride = array<i64: 1, 1>, dilation = array<i64: 2, 1>} : (tensor<1x49x42x27xf32>, tensor<28x3x3x27xf32>, tensor<28xf32>)  -> (tensor<1x45x40x28xf32>)
@@ -392,8 +392,8 @@
   // CHECK: %[[CST:.+]] = arith.constant 0
   // CHECK: %[[FILL:.+]] = linalg.fill
   // CHECK: %[[B_IN:.+]] = tensor.empty(%[[BATCH]])
-  // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<?x49x42x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<?x45x40x28xf32>)
-  // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<?x45x40x28xf32>) outs(%[[B_IN]] : tensor<?x45x40x28xf32>)
+  // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<?x49x42x27xf32>, tensor<3x3x27x28xf32>) inits(%[[FILL]] : tensor<?x45x40x28xf32>)
+  // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<?x45x40x28xf32>) inits(%[[B_IN]] : tensor<?x45x40x28xf32>)
   // CHECK:   %[[ADD:.+]] = arith.addf
   // CHECK:   linalg.yield %[[ADD]] : f32
   %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = array<i64: 0, 0, 0, 0>, stride = array<i64: 1, 1>, dilation = array<i64: 2, 1>} : (tensor<?x49x42x27xf32>, tensor<28x3x3x27xf32>, tensor<28xf32>)  -> (tensor<?x45x40x28xf32>)
@@ -452,8 +452,8 @@
   // CHECK: %[[CST:.+]] = arith.constant 0
   // CHECK: %[[FILL:.+]] = linalg.fill
   // CHECK: %[[B_IN:.+]] = tensor.empty(%[[H_OUT]], %[[W_OUT]])
-  // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[WEIGHT]] : tensor<1x?x?x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<1x?x?x28xf32>)
-  // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x?x?x28xf32>) outs(%[[B_IN]] : tensor<1x?x?x28xf32>)
+  // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[WEIGHT]] : tensor<1x?x?x27xf32>, tensor<3x3x27x28xf32>) inits(%[[FILL]] : tensor<1x?x?x28xf32>)
+  // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x?x?x28xf32>) inits(%[[B_IN]] : tensor<1x?x?x28xf32>)
   // CHECK:   %[[ADD:.+]] = arith.addf
   // CHECK:   linalg.yield %[[ADD]] : f32
   %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = array<i64: 0, 0, 0, 0>, stride = array<i64: 1, 1>, dilation = array<i64: 2, 1>} : (tensor<1x?x?x27xf32>, tensor<28x3x3x27xf32>, tensor<28xf32>)  -> (tensor<1x?x?x28xf32>)
@@ -493,11 +493,11 @@
 func.func @depthwise_conv(%arg0 : tensor<1x7x5x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () {
   // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[CST0:%.+]] = arith.constant 0
-  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
+  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]]
   // CHECK: [[OUT:%.+]] = tensor.empty()
-  // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>)
+  // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>) inits([[FILL]] : tensor<1x5x5x3x11xf32>)
   // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
-  // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) {
+  // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) inits([[OUT]] : tensor<1x5x5x33xf32>) {
   // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32):
   // CHECK:   [[ADD:%.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32
   // CHECK:   linalg.yield [[ADD]] : f32
@@ -519,9 +519,9 @@
   // CHECK: %[[CST0:.+]] = arith.constant 0
   // CHECK: %[[FILL:.+]] = linalg.fill
   // CHECK: %[[OUT:.+]] = tensor.empty(%[[BATCH]])
-  // CHECK: %[[DEPTH:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<?x7x5x3xf32>, tensor<3x1x3x11xf32>) outs(%[[FILL]] : tensor<?x5x5x3x11xf32>)
+  // CHECK: %[[DEPTH:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<?x7x5x3xf32>, tensor<3x1x3x11xf32>) inits(%[[FILL]] : tensor<?x5x5x3x11xf32>)
   // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
-  // CHECK: %[[BIAS:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[COLLAPSED]] : tensor<33xf32>, tensor<?x5x5x33xf32>) outs(%[[OUT]] : tensor<?x5x5x33xf32>) {
+  // CHECK: %[[BIAS:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[COLLAPSED]] : tensor<33xf32>, tensor<?x5x5x33xf32>) inits(%[[OUT]] : tensor<?x5x5x33xf32>) {
   // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32):
   // CHECK:   %[[ADD:.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32
   // CHECK:   linalg.yield %[[ADD]] : f32
@@ -539,11 +539,11 @@
 func.func @depthwise_conv_strides(%arg0 : tensor<1x11x9x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () {
   // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[CST0:%.+]] = arith.constant 0
-  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
+  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]]
   // CHECK: [[OUT:%.+]] = tensor.empty()
-  // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>)
+  // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>) inits([[FILL]] : tensor<1x5x5x3x11xf32>)
   // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
-  // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) {
+  // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) inits([[OUT]] : tensor<1x5x5x33xf32>) {
   // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32):
   // CHECK:   [[ADD:%.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32
   // CHECK:   linalg.yield [[ADD]] : f32
@@ -565,13 +565,13 @@
 
   // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[CST0:%.+]] = arith.constant 0
-  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
+  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]]
   // CHECK: [[OUT:%.+]] = tensor.empty()
   // CHECK: [[C128:%.+]] = arith.constant -128
   // CHECK: [[C42:%.+]] = arith.constant 42
-  // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins([[PAD]], %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x12x12x4x128xi32>)
+  // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins([[PAD]], %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) inits([[FILL]] : tensor<1x12x12x4x128xi32>)
   // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
-  // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x12x12x512xi32>) outs([[OUT]] : tensor<1x12x12x512xi32>) {
+  // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x12x12x512xi32>) inits([[OUT]] : tensor<1x12x12x512xi32>) {
   // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: i32, %[[ARG4:[0-9a-zA-Z_]+]]: i32, %[[ARG5:[0-9a-zA-Z_]+]]: i32):
   // CHECK:   [[ADD:%.+]] = arith.addi %[[ARG3]], %[[ARG4]] : i32
   // CHECK:   linalg.yield [[ADD]] : i32
@@ -589,13 +589,13 @@
 func.func @depthwise_conv_quant_dilations(%arg0 : tensor<1x14x14x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () {
   // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[CST0:%.+]] = arith.constant 0
-  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
+  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]]
   // CHECK: [[OUT:%.+]] = tensor.empty()
   // CHECK: [[C128:%.+]] = arith.constant -128
   // CHECK: [[C42:%.+]] = arith.constant 42
-  // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x10x10x4x128xi32>)
+  // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) inits([[FILL]] : tensor<1x10x10x4x128xi32>)
   // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
-  // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x10x10x512xi32>) outs([[OUT]] : tensor<1x10x10x512xi32>) {
+  // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x10x10x512xi32>) inits([[OUT]] : tensor<1x10x10x512xi32>) {
   // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: i32, %[[ARG4:[0-9a-zA-Z_]+]]: i32, %[[ARG5:[0-9a-zA-Z_]+]]: i32):
   // CHECK:   [[ADD:%.+]] = arith.addi %[[ARG3]], %[[ARG4]] : i32
   // CHECK:   linalg.yield [[ADD]] : i32
@@ -614,7 +614,7 @@
   // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: index, %[[ARG4:[0-9a-zA-Z_]+]]: index, %[[ARG5:[0-9a-zA-Z_]+]]: index, %[[ARG6:[0-9a-zA-Z_]+]]: index):
   // CHECK: tensor.yield %cst : f32
   // CHECK:  } : tensor<2x?x?x3xf32> to tensor<2x?x?x3xf32>
-  // CHECK: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<[1, 2]> : tensor<2xi64>} ins(%[[PADDED]], %arg1 : tensor<2x?x?x3xf32>, tensor<3x6x3x5xf32>) outs(%{{.*}} : tensor<2x?x?x3x5xf32>) -> tensor<2x?x?x3x5xf32>
+  // CHECK: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<[1, 2]> : tensor<2xi64>} ins(%[[PADDED]], %arg1 : tensor<2x?x?x3xf32>, tensor<3x6x3x5xf32>) inits(%{{.*}} : tensor<2x?x?x3x5xf32>) -> tensor<2x?x?x3x5xf32>
   // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[CONV]] {{\[}}[0], [1], [2], [3, 4]]
   %0 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) {pad = array<i64: 1, 2, 3, 4>, dilation = array<i64: 2, 1>, stride = array<i64: 1, 2>} : (tensor<2x?x?x3xf32>, tensor<3x6x3x5xf32>, tensor<15xf32>) -> tensor<2x?x?x15xf32>
   return
@@ -628,16 +628,16 @@
   // CHECK-DAG: %[[TRANSPOSE:.+]] = "tosa.transpose"(%arg1, %[[PERMS]])
   // CHECK-DAG: %[[EMPTY:.+]] = tensor.empty()
   // CHECK-DAG: %[[ZERO:.+]] = arith.constant 0
-  // CHECK-DAG: %[[FILL:.+]] = linalg.fill ins(%[[ZERO]] : f32) outs(%[[EMPTY]] : tensor<1x47x45x43x28xf32>)
+  // CHECK-DAG: %[[FILL:.+]] = linalg.fill ins(%[[ZERO]] : f32) inits(%[[EMPTY]] : tensor<1x47x45x43x28xf32>)
   // CHECK-DAG: %[[EMPTY:.+]] = tensor.empty()
   // CHECK-DAG: %[[CONV3D:.+]] = linalg.conv_3d_ndhwc_dhwcf
   // CHECK-SAME: {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
   // CHECK-SAME: ins(%arg0, %[[TRANSPOSE]] : tensor<1x49x48x47x27xf32>, tensor<3x4x5x27x28xf32>)
-  // CHECK-SAME: outs(%[[FILL]] : tensor<1x47x45x43x28xf32>) -> tensor<1x47x45x43x28xf32>
+  // CHECK-SAME: inits(%[[FILL]] : tensor<1x47x45x43x28xf32>) -> tensor<1x47x45x43x28xf32>
   // CHECK: %[[GENERIC:.+]] = linalg.generic
   // CHECK-SAME: {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]}
   // CHECK-SAME: ins(%arg2, %[[CONV3D]] : tensor<28xf32>, tensor<1x47x45x43x28xf32>)
-  // CHECK-SAME: outs(%[[EMPTY]] : tensor<1x47x45x43x28xf32>) {
+  // CHECK-SAME: inits(%[[EMPTY]] : tensor<1x47x45x43x28xf32>) {
   // CHECK: ^bb0(%[[A1:.+]]: f32, %[[A2:.+]]: f32, %{{.+}}: f32):
   // CHECK: %[[ADD:.+]] = arith.addf %[[A1]], %[[A2]] : f32
   // CHECK: linalg.yield %[[ADD]]
@@ -653,18 +653,18 @@
   // CHECK-DAG: %[[TRANSPOSE:.+]] = "tosa.transpose"(%arg1, %[[PERMS]])
   // CHECK-DAG: %[[EMPTY:.+]] = tensor.empty()
   // CHECK-DAG: %[[ZERO:.+]] = arith.constant 0
-  // CHECK-DAG: %[[FILL:.+]] = linalg.fill ins(%[[ZERO]] : i32) outs(%[[EMPTY]] : tensor<1x47x45x43x28xi32>)
+  // CHECK-DAG: %[[FILL:.+]] = linalg.fill ins(%[[ZERO]] : i32) inits(%[[EMPTY]] : tensor<1x47x45x43x28xi32>)
   // CHECK-DAG: %[[EMPTY:.+]] = tensor.empty()
   // CHECK-DAG: %[[IZP:.+]] = arith.constant -128 : i32
   // CHECK-DAG: %[[FZP:.+]] = arith.constant 42 : i32
   // CHECK-DAG: %[[CONV3D:.+]] = linalg.conv_3d_ndhwc_dhwcf_q
   // CHECK-SAME: {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
   // CHECK-SAME: ins(%arg0, %[[TRANSPOSE]], %[[IZP]], %[[FZP]] : tensor<1x49x48x47x27xi8>, tensor<3x4x5x27x28xi8>, i32, i32)
-  // CHECK-SAME: outs(%[[FILL]] : tensor<1x47x45x43x28xi32>) -> tensor<1x47x45x43x28xi32>
+  // CHECK-SAME: inits(%[[FILL]] : tensor<1x47x45x43x28xi32>) -> tensor<1x47x45x43x28xi32>
   // CHECK: %[[GENERIC:.+]] = linalg.generic
   // CHECK-SAME: {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]}
   // CHECK-SAME: ins(%arg2, %[[CONV3D]] : tensor<28xi32>, tensor<1x47x45x43x28xi32>)
-  // CHECK-SAME: outs(%[[EMPTY]] : tensor<1x47x45x43x28xi32>) {
+  // CHECK-SAME: inits(%[[EMPTY]] : tensor<1x47x45x43x28xi32>) {
   // CHECK: ^bb0(%[[A1:.+]]: i32, %[[A2:.+]]: i32, %{{.+}}: i32):
   // CHECK: %[[ADD:.+]] = arith.addi %[[A1]], %[[A2]] : i32
   // CHECK: linalg.yield %[[ADD]]
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir
@@ -34,7 +34,7 @@
   // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<3x1x5x7xf32>
   // CHECK: %[[GENERIC:.+]] = linalg.generic 
   // CHECK-SAME: indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
-  // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xf32>) outs(%[[EMPTY]] : tensor<3x1x5x7xf32>)
+  // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xf32>) inits(%[[EMPTY]] : tensor<3x1x5x7xf32>)
   // CHECK: ^bb0(%[[IN:.+]]: f32, %[[OUT:.+]]: f32):
   // CHECK:   linalg.yield %[[IN]] : f32
   %resize = "tosa.resize"(%arg0) {mode = "NEAREST_NEIGHBOR", scale = array<i64: 2, 1, 3, 1>, offset = array<i64: 0, 0>, border = array<i64: 0, 0>} : (tensor<3x1x1x7xf32>) -> tensor<3x1x5x7xf32>
@@ -52,7 +52,7 @@
   // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<3x7xi32>
   // CHECK: %[[RESIZE:.+]] = linalg.generic
   // CHECK-SAME: {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]}
-  // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xi8>) outs(%[[EMPTY]] : tensor<3x7xi32>)
+  // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xi8>) inits(%[[EMPTY]] : tensor<3x7xi32>)
   // CHECK: ^bb0(%[[IN:.+]]: i8, %[[OUT:.+]]: i32):
   // CHECK:   %[[EXT:.+]] = arith.extsi %[[IN]] : i8 to i32
   // CHECK-DAG:   %[[C2:.+]] = arith.constant 2 : i32
@@ -68,7 +68,7 @@
   // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<3x4x5x7xi32>
   // CHECK: %[[BROADCAST:.+]] = linalg.generic
   // CHECK-SAME: indexing_maps = [#map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
-  // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xi32>) outs(%[[EMPTY]] : tensor<3x4x5x7xi32>) {
+  // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xi32>) inits(%[[EMPTY]] : tensor<3x4x5x7xi32>) {
   // CHECK: ^bb0(%[[IN:.+]]: i32, %[[OUT:.+]]: i32):
   // CHECK:   linalg.yield %[[IN]] : i32
   %resize = "tosa.resize"(%arg0) {mode = "BILINEAR", scale = array<i64: 2, 1, 3, 1>, offset = array<i64: 0, 0>, border = array<i64: 0, 0>} : (tensor<3x1x1x7xi8>) -> tensor<3x4x5x7xi32>
@@ -87,7 +87,7 @@
   // CHECK: %[[GENERIC:.+]] = linalg.generic 
   // CHECK-SAME: indexing_maps = [#map, #map]
   // CHECK-SAME: iterator_types = ["parallel", "parallel"]}
-  // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xi8>) outs(%[[EMPTY]] : tensor<3x7xi32>) {
+  // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xi8>) inits(%[[EMPTY]] : tensor<3x7xi32>) {
   // CHECK: ^bb0(%[[IN:.+]]: i8, %[[OUT:.+]]: i32):
   // CHECK:   %[[EXT:.+]] = arith.extsi %[[IN]] : i8 to i32
   // CHECK-DAG:   %[[C2:.+]] = arith.constant 2 : i32
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
@@ -6,7 +6,7 @@
 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
 func.func @test_abs(%arg0: tensor<f32>) -> tensor<f32> {
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<f32>
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = []} ins(%[[ARG0]] : tensor<f32>) outs([[INIT]] : tensor<f32>) {
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = []} ins(%[[ARG0]] : tensor<f32>) inits([[INIT]] : tensor<f32>) {
   // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32):
   // CHECK:   [[ELEMENT:%.+]] = math.absf %[[ARG1]]
   // CHECK:   linalg.yield [[ELEMENT]] : f32
@@ -26,7 +26,7 @@
 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
 func.func @test_abs(%arg0: tensor<2xf32>) -> tensor<2xf32> {
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xf32>
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xf32>) outs([[INIT]] : tensor<2xf32>) {
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xf32>) inits([[INIT]] : tensor<2xf32>) {
   // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32):
   // CHECK:   [[ELEMENT:%.+]] = math.absf %[[ARG1]]
   // CHECK:   linalg.yield [[ELEMENT]] : f32
@@ -45,7 +45,7 @@
 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
 func.func @test_abs(%arg0: tensor<2x3xf32>) -> tensor<2x3xf32> {
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2x3xf32>
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xf32>) outs([[INIT]] : tensor<2x3xf32>) {
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xf32>) inits([[INIT]] : tensor<2x3xf32>) {
   // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32):
   // CHECK:   [[ELEMENT:%.+]] = math.absf %[[ARG1]]
   // CHECK:   linalg.yield [[ELEMENT]] : f32
@@ -97,7 +97,7 @@
 func.func @test_broadcast(%arg0: tensor<1xf32>, %arg1: tensor<2xf32>) -> tensor<2xf32> {
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xf32>
   // CHECK: [[RESHAPE:%.+]] = "tosa.reshape"(%[[ARG0]])
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins([[RESHAPE]], %[[ARG1]] : tensor<f32>, tensor<2xf32>) outs([[INIT]] : tensor<2xf32>) {
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins([[RESHAPE]], %[[ARG1]] : tensor<f32>, tensor<2xf32>) inits([[INIT]] : tensor<2xf32>) {
   // CHECK: ^bb0(%[[ARG2:.*]]: f32, %[[ARG3:.*]]: f32, %[[ARG4:.*]]: f32):
   // CHECK:   [[ELEMENT:%.+]] = arith.addf %[[ARG2]], %[[ARG3]] : f32
   // CHECK:   linalg.yield [[ELEMENT]] : f32
@@ -117,7 +117,7 @@
 func.func @test_broadcast_swapped_args(%arg0: tensor<2xf32>, %arg1: tensor<1xf32>) -> tensor<2xf32> {
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xf32>
   // CHECK: [[RESHAPE:%.+]] = "tosa.reshape"(%[[ARG1]])
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]], [[RESHAPE]] : tensor<2xf32>, tensor<f32>) outs([[INIT]] : tensor<2xf32>) {
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]], [[RESHAPE]] : tensor<2xf32>, tensor<f32>) inits([[INIT]] : tensor<2xf32>) {
   // CHECK: ^bb0(%[[ARG2:.*]]: f32, %[[ARG3:.*]]: f32, %[[ARG4:.*]]: f32):
   // CHECK:   [[ELEMENT:%.+]] = arith.addf %[[ARG2]], %[[ARG3]] : f32
   // CHECK:   linalg.yield [[ELEMENT]] : f32
@@ -139,7 +139,7 @@
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2x3xf32>
   // CHECK: [[RESHAPE1:%.+]] = "tosa.reshape"(%[[ARG0]]) {new_shape = array<i64: 3>}
   // CHECK: [[RESHAPE2:%.+]] = "tosa.reshape"(%[[ARG1]]) {new_shape = array<i64: 2>}
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins([[RESHAPE1]], [[RESHAPE2]] : tensor<3xf32>, tensor<2xf32>) outs([[INIT]] : tensor<2x3xf32>) {
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins([[RESHAPE1]], [[RESHAPE2]] : tensor<3xf32>, tensor<2xf32>) inits([[INIT]] : tensor<2x3xf32>) {
   // CHECK: ^bb0(%[[ARG2:.*]]: f32, %[[ARG3:.*]]: f32, %[[ARG4:.*]]: f32):
   // CHECK:   [[ELEMENT:%.+]] = arith.addf %[[ARG2]], %[[ARG3]] : f32
   // CHECK:   linalg.yield [[ELEMENT]] : f32
@@ -560,7 +560,7 @@
 func.func @test_transpose(%arg0: tensor<1x2x3xi32>) -> () {
   %0 = arith.constant dense<[1, 2, 0]> : tensor<3xi32>
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2x3x1xi32>
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel"]} ins([[ARG0]] : tensor<1x2x3xi32>) outs([[OUT:%.+]] : tensor<2x3x1xi32>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel"]} ins([[ARG0]] : tensor<1x2x3xi32>) inits([[OUT:%.+]] : tensor<2x3x1xi32>)
   // CHECK: ^bb0([[ARG1:%.+]]: i32, [[ARG2:%.+]]: i32)
   // CHECK:   linalg.yield [[ARG1]]
   // CHECK: }
@@ -580,7 +580,7 @@
   // CHECK: %[[C1:.+]] = arith.constant 1
   // CHECK: %[[DIM:.+]] = tensor.dim %[[ARG0]], %[[C1]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]]) : tensor<?x4x1x3xi32>
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<1x?x3x4xi32>) outs([[OUT:%.+]] : tensor<?x4x1x3xi32>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<1x?x3x4xi32>) inits([[OUT:%.+]] : tensor<?x4x1x3xi32>)
   // CHECK: ^bb0([[ARG1:%.+]]: i32, [[ARG2:%.+]]: i32)
   // CHECK:   linalg.yield [[ARG1]]
   // CHECK: }
@@ -602,7 +602,7 @@
   // CHECK: %[[C1:.+]] = arith.constant 1
   // CHECK: %[[DIM1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM1]], %[[DIM0]])
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x?xf32>) outs([[OUT:%.+]] : tensor<?x?xf32>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x?xf32>) inits([[OUT:%.+]] : tensor<?x?xf32>)
   // CHECK: ^bb0([[ARG1:%.+]]: f32, [[ARG2:%.+]]: f32)
   // CHECK:   linalg.yield [[ARG1]]
   // CHECK: }
@@ -621,8 +621,8 @@
 func.func @reduce_float(%arg0: tensor<5x4xf32>) -> () {
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<4xf32>
   // CHECK: [[CST0:%.+]] = arith.constant 0.0
-  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xf32>) outs([[FILL]] : tensor<4xf32>)
+  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]]
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xf32>) inits([[FILL]] : tensor<4xf32>)
   // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32)
   // CHECK:   [[RES:%.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32
   // CHECK:   linalg.yield [[RES]] : f32
@@ -631,8 +631,8 @@
 
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<5xf32>
   // CHECK: [[CST0:%.+]] = arith.constant 0.0
-  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP2]]], iterator_types = ["parallel", "reduction"]} ins([[ARG0]] : tensor<5x4xf32>) outs([[FILL]] : tensor<5xf32>)
+  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]]
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP2]]], iterator_types = ["parallel", "reduction"]} ins([[ARG0]] : tensor<5x4xf32>) inits([[FILL]] : tensor<5xf32>)
   // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32)
   // CHECK:   [[RES:%.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32
   // CHECK:   linalg.yield [[RES]] : f32
@@ -671,8 +671,8 @@
   // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[C0]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor<?x4xf32>
   // CHECK: %[[CST0:.+]] = arith.constant 0.0
-  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST0]]{{.*}}outs(%[[INIT]]
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "reduction", "parallel"]} ins(%[[ARG0]] : tensor<?x5x4xf32>) outs(%[[FILL]] : tensor<?x4xf32>)
+  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST0]]{{.*}}inits(%[[INIT]]
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "reduction", "parallel"]} ins(%[[ARG0]] : tensor<?x5x4xf32>) inits(%[[FILL]] : tensor<?x4xf32>)
   // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32)
   // CHECK:   %[[RES:.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32
   // CHECK:   linalg.yield %[[RES]] : f32
@@ -691,8 +691,8 @@
 func.func @reduce_float_dyn_rank_1(%arg0: tensor<?xf32>) -> () {
   // CHECK-DAG: %[[INIT:.+]] = tensor.empty() : tensor<f32>
   // CHECK-DAG: %[[CST0:.+]] = arith.constant 0.0
-  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST0]]{{.*}}outs(%[[INIT]]
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction"]} ins(%[[ARG0]] : tensor<?xf32>) outs(%[[FILL]] : tensor<f32>)
+  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST0]]{{.*}}inits(%[[INIT]]
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction"]} ins(%[[ARG0]] : tensor<?xf32>) inits(%[[FILL]] : tensor<f32>)
   // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32)
   // CHECK:   %[[RES:.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32
   // CHECK:   linalg.yield %[[RES]] : f32
@@ -713,8 +713,8 @@
   // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[C1]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor<5x?xf32>
   // CHECK: %[[CST1:.+]] = arith.constant 1.0
-  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST1]]{{.*}}outs(%[[INIT]]
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[ARG0]] : tensor<5x?x4xf32>) outs(%[[FILL]] : tensor<5x?xf32>)
+  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST1]]{{.*}}inits(%[[INIT]]
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[ARG0]] : tensor<5x?x4xf32>) inits(%[[FILL]] : tensor<5x?xf32>)
   // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32)
   // CHECK:   %[[RES:.+]] = arith.mulf %[[ARG1]], %[[ARG2]] : f32
   // CHECK:   linalg.yield %[[RES]] : f32
@@ -735,8 +735,8 @@
   // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[C0]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
   // CHECK: %[[CMIN:.+]] = arith.constant -3.40282347E+38
-  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CMIN]]{{.*}}outs(%[[INIT]]
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<?x?xf32>) outs(%[[FILL]] : tensor<?xf32>)
+  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CMIN]]{{.*}}inits(%[[INIT]]
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<?x?xf32>) inits(%[[FILL]] : tensor<?xf32>)
   // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32)
   // CHECK:   %[[MAX:.+]] = arith.maxf %[[ARG1]], %[[ARG2]] : f32
   // CHECK:   linalg.yield %[[MAX]] : f32
@@ -756,8 +756,8 @@
 func.func @reduce_int(%arg0: tensor<5x4xi32>) -> () {
   // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[CST0:%.+]] = arith.constant 0
-  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xi32>) outs([[FILL]] : tensor<4xi32>)
+  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]]
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xi32>) inits([[FILL]] : tensor<4xi32>)
   // CHECK: ^bb0(%[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32)
   // CHECK:   [[RES:%.+]] = arith.addi %[[ARG1]], %[[ARG2]] : i32
   // CHECK:   linalg.yield [[RES]] : i32
@@ -766,8 +766,8 @@
 
   // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[CST0:%.+]] = arith.constant 0
-  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP2]]], iterator_types = ["parallel", "reduction"]} ins([[ARG0]] : tensor<5x4xi32>) outs([[FILL]] : tensor<5xi32>)
+  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]]
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP2]]], iterator_types = ["parallel", "reduction"]} ins([[ARG0]] : tensor<5x4xi32>) inits([[FILL]] : tensor<5xi32>)
   // CHECK: ^bb0(%[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32)
   // CHECK:   [[RES:%.+]] = arith.addi %[[ARG1]], %[[ARG2]] : i32
   // CHECK:   linalg.yield [[RES]] : i32
@@ -806,8 +806,8 @@
 func.func @reduce_bool(%arg0: tensor<5x4xi1>) -> () {
   // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[CST0:%.+]] = arith.constant true
-  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xi1>) outs([[FILL]] : tensor<4xi1>)
+  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]]
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xi1>) inits([[FILL]] : tensor<4xi1>)
   // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i1, %[[ARG2:[0-9a-zA-Z_]+]]: i1)
   // CHECK:   [[RES:%.+]] = arith.andi %[[ARG1]], %[[ARG2]] : i1
   // CHECK:   linalg.yield [[RES]] : i1
@@ -905,7 +905,7 @@
   // CHECK: [[C0:%.+]] = arith.constant 19689
   // CHECK: [[C1:%.+]] = arith.constant 15
   // CHECK: [[INIT:%.+]] = tensor.empty()
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi8>) outs([[INIT]] : tensor<2xi8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi8>) inits([[INIT]] : tensor<2xi8>)
   // CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: i8):
   // CHECK: [[C17:%.+]] = arith.constant 17
   // CHECK: [[C22:%.+]] = arith.constant 22
@@ -926,7 +926,7 @@
   // CHECK: [[C0:%.+]] = arith.constant 19689
   // CHECK: [[C1:%.+]] = arith.constant 15
   // CHECK: [[INIT:%.+]] = tensor.empty()
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi8>) outs([[INIT]] : tensor<2xui8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi8>) inits([[INIT]] : tensor<2xui8>)
   // CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: ui8):
   // CHECK: [[C17:%.+]] = arith.constant 17
   // CHECK: [[C22:%.+]] = arith.constant 22
@@ -959,13 +959,13 @@
   // CHECK: %[[C0:.+]] = arith.constant 0
   // CHECK: %[[BATCH:.+]] = tensor.dim %[[ARG0]], %[[C0]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]]) : tensor<?x2xi8>
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x2xi8>) outs(%[[INIT]] : tensor<?x2xi8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x2xi8>) inits(%[[INIT]] : tensor<?x2xi8>)
   %0 = "tosa.rescale"(%arg0) {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = array<i32: 19689>, shift = array<i32: 15>, scale32 = false, double_round = false, per_channel = false} : (tensor<?x2xi8>)  -> (tensor<?x2xi8>)
 
   // CHECK: %[[C0:.+]] = arith.constant 0
   // CHECK: %[[BATCH:.+]] = tensor.dim %[[ARG0]], %[[C0]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]]) : tensor<?x2xui8>
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x2xi8>) outs(%[[INIT]] : tensor<?x2xui8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x2xi8>) inits(%[[INIT]] : tensor<?x2xui8>)
   %1 = "tosa.rescale"(%arg0) {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = array<i32: 19689>, shift = array<i32: 15>, scale32 = false, double_round = false, per_channel = false} : (tensor<?x2xi8>)  -> (tensor<?x2xui8>)
 
   return
@@ -983,7 +983,7 @@
   // CHECK: %[[C2:.+]] = arith.constant 2
   // CHECK: %[[DIM2:.+]] = tensor.dim %[[ARG0]], %[[C2]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM1]], %[[DIM2]])
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<1x?x?x32xi32>) outs(%[[INIT]] : tensor<1x?x?x32xi8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<1x?x?x32xi32>) inits(%[[INIT]] : tensor<1x?x?x32xi8>)
   %0 = "tosa.rescale"(%arg0) {double_round = true, input_zp = 0 : i32, multiplier = array<i32: 1376784203>, output_zp = 0 : i32, per_channel = false, scale32 = true, shift = array<i32: 38>} : (tensor<1x?x?x32xi32>) -> tensor<1x?x?x32xi8>
   return
 }
@@ -998,7 +998,7 @@
   // CHECK: [[C0:%.+]] = arith.constant 19689
   // CHECK: [[C1:%.+]] = arith.constant 15
   // CHECK: [[INIT:%.+]] = tensor.empty()
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xui8>) outs([[INIT]] : tensor<2xi8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xui8>) inits([[INIT]] : tensor<2xi8>)
   // CHECK: ^bb0([[IN:%.+]]: ui8, [[UNUSED:%.+]]: i8):
   // CHECK: [[C17:%.+]] = arith.constant 17
   // CHECK: [[C22:%.+]] = arith.constant 22
@@ -1030,7 +1030,7 @@
   // CHECK: [[MULTIPLIERS:%.+]] = arith.constant dense<[42, 43, 0]>
   // CHECK: [[SHIFTS:%.+]] = arith.constant dense<[14, 15, 0]>
   // CHECK: [[INIT:%.+]] = tensor.empty()
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]], [[MULTIPLIERS]], [[SHIFTS]] : tensor<3xi8>, tensor<3xi32>, tensor<3xi8>) outs([[INIT]] : tensor<3xi8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]], [[MULTIPLIERS]], [[SHIFTS]] : tensor<3xi8>, tensor<3xi32>, tensor<3xi8>) inits([[INIT]] : tensor<3xi8>)
   // CHECK: ^bb0([[IN:%.+]]: i8, [[MULTIPLIER:%.+]]: i32, [[SHIFT:%.+]]: i8, [[UNUSED:%.+]]: i8):
   // CHECK: [[C243:%.+]] = arith.constant 243
   // CHECK: [[C252:%.+]] = arith.constant 252
@@ -1083,7 +1083,7 @@
   // CHECK: %[[C0:.+]] = arith.constant 0
   // CHECK: %[[RDIM:.+]] = tensor.dim %[[ARG0]], %[[C0]]
   // CHECK: %[[INIT:.+]] = tensor.empty()
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} outs(%[[INIT]] : tensor<5x4xi32>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} inits(%[[INIT]] : tensor<5x4xi32>)
   // CHECK-DAG:   %[[I0:.+]] = linalg.index 0
   // CHECK-DAG:   %[[I1:.+]] = linalg.index 1
   // CHECK-DAG:   %[[SUB1:.+]] = arith.constant 1
@@ -1096,7 +1096,7 @@
   // CHECK: %[[C1:.+]] = arith.constant 1
   // CHECK: %[[RDIM:.+]] = tensor.dim %[[ARG0]], %[[C1]]
   // CHECK: %[[INIT:.+]] = tensor.empty()
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} outs(%[[INIT]] : tensor<5x4xi32>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} inits(%[[INIT]] : tensor<5x4xi32>)
   // CHECK-DAG:   %[[I0:.+]] = linalg.index 0
   // CHECK-DAG:   %[[I1:.+]] = linalg.index 1
   // CHECK-DAG:   %[[SUB1:.+]] = arith.constant 1
@@ -1120,7 +1120,7 @@
   // CHECK: %[[C0_2:.+]] = arith.constant 0
   // CHECK: %[[D0_2:.+]] = tensor.dim %[[ARG0]], %[[C0_2]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[D0_1]])
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel"]} outs(%[[INIT]] : tensor<?xi32>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel"]} inits(%[[INIT]] : tensor<?xi32>)
   // CHECK-DAG:   %[[I0:.+]] = linalg.index 0
   // CHECK-DAG:   %[[SUB1:.+]] = arith.constant 1
   // CHECK-DAG:   %[[RDIM_MINUS_C1:.+]] = arith.subi %[[D0_2]], %[[SUB1]]
@@ -1140,21 +1140,21 @@
 // CHECK-SAME: %[[ARG0:.+]]: tensor<2x3xi8>
 func.func @tile(%arg0 : tensor<2x3xi8>) -> () {
   // CHECK: [[INIT:%.+]] = tensor.empty()
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs([[INIT]] : tensor<2x2x1x3xi8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) inits([[INIT]] : tensor<2x2x1x3xi8>)
   // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i8
   // CHECK:   linalg.yield %[[ARG1]] : i8
   // CHECK: "tosa.reshape"([[GENERIC]]) {new_shape = array<i64: 4, 3>}
   %0 = "tosa.tile"(%arg0) {multiples = array<i64: 2, 1>} : (tensor<2x3xi8>)  -> (tensor<4x3xi8>)
 
   // CHECK: [[INIT:%.+]] = tensor.empty()
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs([[INIT]] : tensor<1x2x2x3xi8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) inits([[INIT]] : tensor<1x2x2x3xi8>)
   // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i8
   // CHECK:   linalg.yield %[[ARG1]] : i8
   // CHECK: "tosa.reshape"([[GENERIC]]) {new_shape = array<i64: 2, 6>}
   %1 = "tosa.tile"(%arg0) {multiples = array<i64: 1, 2>} : (tensor<2x3xi8>)  -> (tensor<2x6xi8>)
 
   // CHECK: [[INIT:%.+]] = tensor.empty()
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs([[INIT]] : tensor<5x2x7x3xi8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) inits([[INIT]] : tensor<5x2x7x3xi8>)
   // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i8
   // CHECK:   linalg.yield %[[ARG1]] : i8
   // CHECK: "tosa.reshape"([[GENERIC]]) {new_shape = array<i64: 10, 21>}
@@ -1174,7 +1174,7 @@
   // CHECK: %[[CST0:.+]] = arith.constant 0
   // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST0]] : tensor<?x3xi8>
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x3xi8>) outs(%[[INIT]] : tensor<2x?x1x3xi8>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x3xi8>) inits(%[[INIT]] : tensor<2x?x1x3xi8>)
   // CHECK: ^bb0(%[[ARG1:.+]]: i8,
   // CHECK:   linalg.yield %[[ARG1]] : i8
   // CHECK: "tosa.reshape"(%[[GENERIC]]) {new_shape = array<i64: -9223372036854775808, 3>}
@@ -1194,7 +1194,7 @@
   // CHECK: %[[CST1:.+]] = arith.constant 1
   // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST1]] : tensor<2x3xi8>
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs(%[[INIT]] : tensor<2x2x?x3xi8>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) inits(%[[INIT]] : tensor<2x2x?x3xi8>)
   // CHECK: ^bb0(%[[ARG1:.+]]: i8,
   // CHECK:   linalg.yield %[[ARG1]] : i8
   // CHECK: "tosa.reshape"(%[[GENERIC]]) {new_shape = array<i64: 2, -9223372036854775808>}
@@ -1214,11 +1214,11 @@
 func.func @argmax(%arg0 : tensor<3x2xi32>, %arg1 : tensor<6xf32>) -> () {
   // CHECK: [[IDX_INIT:%.+]] = tensor.empty()
   // CHECK: [[IDX_MIN:%.+]] = arith.constant 0 : i32
-  // CHECK: [[IDX_FILL:%.+]] = linalg.fill ins([[IDX_MIN]]{{.*}}outs([[IDX_INIT]]
+  // CHECK: [[IDX_FILL:%.+]] = linalg.fill ins([[IDX_MIN]]{{.*}}inits([[IDX_INIT]]
   // CHECK: [[VAL_INIT:%.+]] = tensor.empty()
   // CHECK: [[VAL_MIN:%.+]] = arith.constant -2147483648
-  // CHECK: [[VAL_FILL:%.+]] = linalg.fill ins([[VAL_MIN]]{{.*}}outs([[VAL_INIT]]
-  // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%[[ARG0]] : tensor<3x2xi32>) outs([[IDX_FILL]], [[VAL_FILL]] : tensor<2xi32>, tensor<2xi32>)
+  // CHECK: [[VAL_FILL:%.+]] = linalg.fill ins([[VAL_MIN]]{{.*}}inits([[VAL_INIT]]
+  // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%[[ARG0]] : tensor<3x2xi32>) inits([[IDX_FILL]], [[VAL_FILL]] : tensor<2xi32>, tensor<2xi32>)
   // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i32, %[[ARG2:[0-9a-zA-Z_]+]]: i32, %[[ARG3:[0-9a-zA-Z_]+]]: i32
   // CHECK:   [[IDX:%.+]] = linalg.index 0
   // CHECK:   [[CAST:%.+]] = arith.index_cast [[IDX]]
@@ -1230,11 +1230,11 @@
 
   // CHECK: [[IDX_INIT:%.+]] = tensor.empty()
   // CHECK: [[IDX_MIN:%.+]] = arith.constant 0 : i32
-  // CHECK: [[IDX_FILL:%.+]] = linalg.fill ins([[IDX_MIN]]{{.*}}outs([[IDX_INIT]]
+  // CHECK: [[IDX_FILL:%.+]] = linalg.fill ins([[IDX_MIN]]{{.*}}inits([[IDX_INIT]]
   // CHECK: [[VAL_INIT:%.+]] = tensor.empty()
   // CHECK: [[VAL_MIN:%.+]] = arith.constant -2147483648
-  // CHECK: [[VAL_FILL:%.+]] = linalg.fill ins([[VAL_MIN]]{{.*}}outs([[VAL_INIT]]
-  // CHECK: linalg.generic {indexing_maps = [#map, #map2, #map2], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<3x2xi32>) outs([[IDX_FILL]], [[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>)
+  // CHECK: [[VAL_FILL:%.+]] = linalg.fill ins([[VAL_MIN]]{{.*}}inits([[VAL_INIT]]
+  // CHECK: linalg.generic {indexing_maps = [#map, #map2, #map2], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<3x2xi32>) inits([[IDX_FILL]], [[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>)
   // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i32, %[[ARG2:[0-9a-zA-Z_]+]]: i32, %[[ARG3:[0-9a-zA-Z_]+]]: i32
   // CHECK:   [[IDX:%.+]] = linalg.index 1
   // CHECK:   [[CAST:%.+]] = arith.index_cast [[IDX]]
@@ -1266,11 +1266,11 @@
   // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST1]]
   // CHECK: %[[IDX_INIT:.+]] = tensor.empty(%[[DYN]])
   // CHECK: %[[IDX_MIN:.+]] = arith.constant 0 : i32
-  // CHECK: %[[IDX_FILL:.+]] = linalg.fill ins(%[[IDX_MIN]]{{.*}}outs(%[[IDX_INIT]]
+  // CHECK: %[[IDX_FILL:.+]] = linalg.fill ins(%[[IDX_MIN]]{{.*}}inits(%[[IDX_INIT]]
   // CHECK: %[[VAL_INIT:.+]] = tensor.empty(%[[DYN]])
   // CHECK: %[[VAL_MIN:.+]] = arith.constant -2147483648
-  // CHECK: %[[VAL_FILL:.+]] = linalg.fill ins(%[[VAL_MIN]]{{.*}}outs(%[[VAL_INIT]]
-  // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%[[ARG0]] : tensor<3x?xi32>) outs(%[[IDX_FILL]], %[[VAL_FILL]] : tensor<?xi32>, tensor<?xi32>)
+  // CHECK: %[[VAL_FILL:.+]] = linalg.fill ins(%[[VAL_MIN]]{{.*}}inits(%[[VAL_INIT]]
+  // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%[[ARG0]] : tensor<3x?xi32>) inits(%[[IDX_FILL]], %[[VAL_FILL]] : tensor<?xi32>, tensor<?xi32>)
   // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i32, %[[ARG2:[0-9a-zA-Z_]+]]: i32, %[[ARG3:[0-9a-zA-Z_]+]]: i32
   // CHECK:   %[[IDX:.+]] = linalg.index 0
   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[IDX]]
@@ -1290,11 +1290,11 @@
 func.func @argmax_dyn_axis(%arg0 : tensor<3x?xi32>) -> () {
   // CHECK: %[[IDX_INIT:.+]] = tensor.empty()
   // CHECK: %[[IDX_MIN:.+]] = arith.constant 0 : i32
-  // CHECK: %[[IDX_FILL:.+]] = linalg.fill ins(%[[IDX_MIN]]{{.*}}outs(%[[IDX_INIT]]
+  // CHECK: %[[IDX_FILL:.+]] = linalg.fill ins(%[[IDX_MIN]]{{.*}}inits(%[[IDX_INIT]]
   // CHECK: %[[VAL_INIT:.+]] = tensor.empty()
   // CHECK: %[[VAL_MIN:.+]] = arith.constant -2147483648
-  // CHECK: %[[VAL_FILL:.+]] = linalg.fill ins(%[[VAL_MIN]]{{.*}}outs(%[[VAL_INIT]]
-  // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<3x?xi32>) outs(%[[IDX_FILL]], %[[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>)
+  // CHECK: %[[VAL_FILL:.+]] = linalg.fill ins(%[[VAL_MIN]]{{.*}}inits(%[[VAL_INIT]]
+  // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<3x?xi32>) inits(%[[IDX_FILL]], %[[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>)
   // CHECK:   %[[IDX:.+]] = linalg.index 1
   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[IDX]]
   // CHECK:   %[[CMP:.+]] = arith.cmpi sgt, %[[ARG1]], %[[ARG3]]
@@ -1312,7 +1312,7 @@
 // CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]
 func.func @gather_float(%arg0: tensor<2x3x2xf32>, %arg1: tensor<2x3xi32>) -> () {
   // CHECK: %[[INIT:.+]] = tensor.empty()
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<2x3xi32>) outs(%[[INIT]] : tensor<2x3x2xf32>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<2x3xi32>) inits(%[[INIT]] : tensor<2x3x2xf32>)
   // CHECK: ^bb0(%[[BBARG0:.+]]: i32, %[[BBARG1:.+]]: f32)
   // CHECK:   %[[IDX0:.+]] = linalg.index 0
   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[BBARG0]]
@@ -1332,7 +1332,7 @@
   // CHECK: %[[C0:.+]] = arith.constant 0
   // CHECK: %[[BATCH:.+]] = tensor.dim %[[ARG0]], %[[C0]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]])
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<?x3xi32>) outs(%[[INIT]] : tensor<?x3x2xf32>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<?x3xi32>) inits(%[[INIT]] : tensor<?x3x2xf32>)
   // CHECK: ^bb0(%[[BBARG0:.+]]: i32, %[[BBARG1:.+]]: f32)
   // CHECK:   %[[IDX0:.+]] = linalg.index 0
   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[BBARG0]]
@@ -1350,7 +1350,7 @@
 // CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]
 func.func @gather_int(%arg0: tensor<2x3x2xi32>, %arg1: tensor<2x3xi32>) -> () {
   // CHECK: %[[INIT:.+]] = tensor.empty()
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<2x3xi32>) outs(%[[INIT]] : tensor<2x3x2xi32>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<2x3xi32>) inits(%[[INIT]] : tensor<2x3x2xi32>)
   // CHECK: ^bb0(%[[BBARG0:.+]]: i32, %[[BBARG1:.+]]: i32)
   // CHECK:   %[[IDX0:.+]] = linalg.index 0
   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[BBARG0]]
@@ -1368,7 +1368,7 @@
 // CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]:
 func.func @table8(%arg0: tensor<6xi8>, %arg1: tensor<512xi8>) -> () {
   // CHECK: %[[INIT:.+]] = tensor.empty()
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi8>) outs(%[[INIT]] : tensor<6xi8>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi8>) inits(%[[INIT]] : tensor<6xi8>)
   // CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8)
   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[ARG_IN]]
   // CHECK:   %[[OFFSET:.+]] = arith.constant 128
@@ -1386,7 +1386,7 @@
 // CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]:
 func.func @table16(%arg0: tensor<6xi16>, %arg1: tensor<513xi16>) -> () {
   // CHECK: %[[INIT:.+]] = tensor.empty()
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi16>) outs(%[[INIT]] : tensor<6xi32>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi16>) inits(%[[INIT]] : tensor<6xi32>)
   // CHECK: ^bb0(%[[ARG2:.*]]: i16, %[[ARG3:.*]]: i32)
   // CHECK: %[[EXT_IN:.+]] = arith.extsi %[[ARG2]]
   // CHECK: %[[C32768:.+]] = arith.constant 32768
@@ -1421,7 +1421,7 @@
   // CHECK: %[[CST0:.+]] = arith.constant 0
   // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST0]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<?xi8>) outs(%[[INIT]] : tensor<?xi8>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<?xi8>) inits(%[[INIT]] : tensor<?xi8>)
   // CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8)
   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[ARG_IN]]
   // CHECK:   %[[OFFSET:.+]] = arith.constant 128
@@ -1439,7 +1439,7 @@
 // CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]:
 func.func @table8_dyn_table(%arg0: tensor<6xi8>, %arg1: tensor<?xi8>) -> () {
   // CHECK: %[[INIT:.+]] = tensor.empty()
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi8>) outs(%[[INIT]] : tensor<6xi8>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi8>) inits(%[[INIT]] : tensor<6xi8>)
   // CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8)
   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[ARG_IN]]
   // CHECK:   %[[OFFSET:.+]] = arith.constant 128
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis-empty-tensor-elimination.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis-empty-tensor-elimination.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis-empty-tensor-elimination.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis-empty-tensor-elimination.mlir
@@ -10,7 +10,7 @@
 
   //      CHECK: linalg.fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?xf32>) -> tensor<?xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<?xf32>) -> tensor<?xf32>
 
   //      CHECK: tensor.insert_slice
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none"]
@@ -37,7 +37,7 @@
 
   //      CHECK: linalg.fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?xf32>) -> tensor<?xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<?xf32>) -> tensor<?xf32>
 
   //      CHECK: tensor.insert_slice
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis.mlir
@@ -4,7 +4,7 @@
 func.func @unknown_op_aliasing(%f: f32, %f2: f32, %pos: index) -> f32 {
   %0 = tensor.empty() : tensor<10xf32>
   // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%f : f32) outs(%0 : tensor<10xf32>) -> tensor<10xf32>
+  %1 = linalg.fill ins(%f : f32) inits(%0 : tensor<10xf32>) -> tensor<10xf32>
 
   // Something must bufferize out-of-place because the op may return an alias
   // of %1.
@@ -12,7 +12,7 @@
   %alias = "dummy.dummy_op"(%1) : (tensor<10xf32>) -> (tensor<10xf32>)
 
   // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%f2 : f32) outs(%1 : tensor<10xf32>) -> tensor<10xf32>
+  %2 = linalg.fill ins(%f2 : f32) inits(%1 : tensor<10xf32>) -> tensor<10xf32>
   %3 = tensor.extract %alias[%pos] : tensor<10xf32>
   return %3 : f32
 }
@@ -23,7 +23,7 @@
 func.func @unknown_op_writing(%f: f32, %f2: f32, %pos: index) -> f32 {
   %0 = tensor.empty() : tensor<10xf32>
   // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%f : f32) outs(%0 : tensor<10xf32>) -> tensor<10xf32>
+  %1 = linalg.fill ins(%f : f32) inits(%0 : tensor<10xf32>) -> tensor<10xf32>
 
   // The op may bufferize to a memory write, so it must bufferize out-of-place.
   // CHECK: "dummy.dummy_op"(%{{.*}}) {__inplace_operands_attr__ = ["false"]}
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir
@@ -11,7 +11,7 @@
   %f0 = arith.constant 0.0: f32
 
   //     CHECK: %[[EXTRACT_SLICE_ALLOC:.*]] = memref.alloc(%[[sz]])
-  //     CHECK: linalg.fill ins({{.*}} : f32) outs(%[[EXTRACT_SLICE_ALLOC]] : memref<?xf32>)
+  //     CHECK: linalg.fill ins({{.*}} : f32) inits(%[[EXTRACT_SLICE_ALLOC]] : memref<?xf32>)
   // Alloc is needed for the **first** insert_slice (due to backward traversal during analysis).
   //     CHECK: %[[DIM:.*]] = memref.dim %[[FUNC_ARG]]
   // This allocs the whole dim to allow for a full clone of t.
@@ -20,7 +20,7 @@
   // insert_slice. The pass replaces the tensor.empty with an out-of-place
   // extract_slice.
   %a = tensor.empty(%sz) : tensor<?xf32>
-  %f = linalg.fill ins(%f0 : f32) outs(%a : tensor<?xf32>) -> tensor<?xf32>
+  %f = linalg.fill ins(%f0 : f32) inits(%a : tensor<?xf32>) -> tensor<?xf32>
 
   //     CHECK: memref.copy %[[FUNC_ARG]], %[[ALLOC]] : memref<?xf32> to memref<?xf32>
   //     CHECK: %[[SV0_ALLOC:.*]] = memref.subview %[[ALLOC]][0] [%[[sz]]] [1] : memref<?xf32> to memref<?xf32, strided<[1]>>
@@ -52,8 +52,8 @@
   // CHECK: %[[T_SUBVIEW:.*]] =  memref.subview %[[FUNC_ARG]][42] [%[[sz]]] [1]
   %a = tensor.empty(%sz) : tensor<?xf32>
 
-  // CHECK: linalg.fill ins({{.*}} : f32) outs(%[[T_SUBVIEW]] : memref<?xf32
-  %f = linalg.fill ins(%f0 : f32) outs(%a : tensor<?xf32>) -> tensor<?xf32>
+  // CHECK: linalg.fill ins({{.*}} : f32) inits(%[[T_SUBVIEW]] : memref<?xf32
+  %f = linalg.fill ins(%f0 : f32) inits(%a : tensor<?xf32>) -> tensor<?xf32>
 
   // Self-copy canonicalizes away later.
   %r1 = tensor.insert_slice %f into %t[42][%sz][1]: tensor<?xf32> into tensor<?xf32>
@@ -79,8 +79,8 @@
     %iv_i32 = arith.index_cast %iv : index to i32
     %f = arith.sitofp %iv_i32 : i32 to f32
 
-    // CHECK: linalg.fill ins(%{{.*}}{{.*}}outs(%[[subview]]
-    %filled = linalg.fill ins(%f : f32) outs(%blank : tensor<5xf32>) -> tensor<5xf32>
+    // CHECK: linalg.fill ins(%{{.*}}{{.*}}inits(%[[subview]]
+    %filled = linalg.fill ins(%f : f32) inits(%blank : tensor<5xf32>) -> tensor<5xf32>
 
     // CHECK-NOT: memref.copy
     %inserted = tensor.insert_slice %filled into %bb[%iv][5][1] : tensor<5xf32> into tensor<?xf32>
@@ -109,8 +109,8 @@
     %iv_i32 = arith.index_cast %iv : index to i32
     %f = arith.sitofp %iv_i32 : i32 to f32
 
-    // CHECK: linalg.fill ins(%{{.*}}{{.*}}outs(%[[subview]]
-    %filled = linalg.fill ins(%f : f32) outs(%blank : tensor<5xf32>) -> tensor<5xf32>
+    // CHECK: linalg.fill ins(%{{.*}}{{.*}}inits(%[[subview]]
+    %filled = linalg.fill ins(%f : f32) inits(%blank : tensor<5xf32>) -> tensor<5xf32>
 
     // CHECK-NOT: memref.copy
     %inserted = tensor.insert_slice %filled into %bb[%idx][5][1] : tensor<5xf32> into tensor<?xf32>
@@ -130,7 +130,7 @@
 func.func @shape_mismatch(%t: tensor<5x6x128xf32>) -> tensor<5x6x128xf32> {
   %cst = arith.constant 8.0 : f32
   %0 = tensor.empty() : tensor<128xf32>
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<128xf32>) -> tensor<128xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<128xf32>) -> tensor<128xf32>
   %2 = tensor.expand_shape %1 [[0, 1, 2]]
       : tensor<128xf32> into tensor<1x1x128xf32>
   %3 = tensor.insert_slice %2 into %t[2, 3, 0][1, 1, 128][1, 1, 1]
@@ -158,8 +158,8 @@
     // CHECK: %[[T_SUBVIEW:.*]] =  memref.subview %[[FUNC_ARG]][42] [%[[sz]]] [1]
     %a = tensor.empty(%sz) : tensor<?xf32>
 
-    // CHECK: linalg.fill ins({{.*}} : f32) outs(%[[T_SUBVIEW]] : memref<?xf32
-    %f = linalg.fill ins(%f0 : f32) outs(%a : tensor<?xf32>) -> tensor<?xf32>
+    // CHECK: linalg.fill ins({{.*}} : f32) inits(%[[T_SUBVIEW]] : memref<?xf32
+    %f = linalg.fill ins(%f0 : f32) inits(%a : tensor<?xf32>) -> tensor<?xf32>
 
     // Self-copy canonicalizes away later.
     scf.forall.in_parallel {
@@ -185,15 +185,15 @@
   %if = scf.if %c -> tensor<?xf32> {
     // CHECK: %[[T_SUBVIEW_1:.*]] =  memref.subview %[[FUNC_ARG]][42] [%[[sz]]] [1]
     %a1 = tensor.empty(%sz) : tensor<?xf32>
-    // CHECK: linalg.fill ins({{.*}} : f32) outs(%[[T_SUBVIEW_1]] : memref<?xf32
-    %f1 = linalg.fill ins(%cst1 : f32) outs(%a1 : tensor<?xf32>) -> tensor<?xf32>
+    // CHECK: linalg.fill ins({{.*}} : f32) inits(%[[T_SUBVIEW_1]] : memref<?xf32
+    %f1 = linalg.fill ins(%cst1 : f32) inits(%a1 : tensor<?xf32>) -> tensor<?xf32>
     // CHECK: scf.yield %[[T_SUBVIEW_1]]
     scf.yield %f1 : tensor<?xf32>
   } else {
       // CHECK: %[[T_SUBVIEW_2:.*]] =  memref.subview %[[FUNC_ARG]][42] [%[[sz]]] [1]
     %a2 = tensor.empty(%sz) : tensor<?xf32>
-    // CHECK: linalg.fill ins({{.*}} : f32) outs(%[[T_SUBVIEW_2]] : memref<?xf32
-    %f2 = linalg.fill ins(%cst2 : f32) outs(%a2 : tensor<?xf32>) -> tensor<?xf32>
+    // CHECK: linalg.fill ins({{.*}} : f32) inits(%[[T_SUBVIEW_2]] : memref<?xf32
+    %f2 = linalg.fill ins(%cst2 : f32) inits(%a2 : tensor<?xf32>) -> tensor<?xf32>
     // CHECK: scf.yield %[[T_SUBVIEW_2]]
     scf.yield %f2 : tensor<?xf32>
   }
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir
@@ -140,10 +140,10 @@
   // One alloc for the alloc_tensor, another one because the transfer_write
   // bufferizes out-of-place.
   // CHECK: %[[m1:.*]] = memref.alloc() {{.*}} : memref<10xf32>
-  // CHECK: linalg.fill ins(%{{.*}}{{.*}}outs(%[[m1]]
+  // CHECK: linalg.fill ins(%{{.*}}{{.*}}inits(%[[m1]]
   // CHECK: %[[filled_tensor:.*]] = bufferization.to_tensor %[[m1]]
   %t1 = bufferization.alloc_tensor() : tensor<10xf32>
-  %filled = linalg.fill ins(%cst : f32) outs(%t1 : tensor<10xf32>) -> tensor<10xf32>
+  %filled = linalg.fill ins(%cst : f32) inits(%t1 : tensor<10xf32>) -> tensor<10xf32>
 
   // The transfer_write is out-of-place because "dummy_op" may read.
   // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10xf32>
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir
@@ -51,13 +51,13 @@
 //       CHECK:   %[[call:.*]] = call @return_slice(%[[t]]
 //       CHECK:   %[[alloc:.*]] = memref.alloc
 //       CHECK:   memref.copy %[[call]], %[[alloc]]
-//       CHECK:   linalg.fill ins({{.*}}) outs(%[[t]]
+//       CHECK:   linalg.fill ins({{.*}}) inits(%[[t]]
 //       CHECK:   memref.load %[[alloc]]
 //       CHECK:   memref.load %[[t]]
 func.func @main(%t: tensor<?xf32>, %sz: index, %idx: index) -> (f32, f32) {
   %cst = arith.constant 1.0 : f32
   %0 = call @return_slice(%t, %sz) : (tensor<?xf32>, index) -> (tensor<?xf32>)
-  %filled = linalg.fill ins(%cst : f32) outs(%t : tensor<?xf32>) -> tensor<?xf32>
+  %filled = linalg.fill ins(%cst : f32) inits(%t : tensor<?xf32>) -> tensor<?xf32>
   %r1 = tensor.extract %0[%idx] : tensor<?xf32>
   %r2 = tensor.extract %filled[%idx] : tensor<?xf32>
   return %r1, %r2 : f32, f32
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir
@@ -75,21 +75,21 @@
   //     CHECK: linalg.matmul
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]}
   %C = linalg.matmul  ins(%A, %B: tensor<4x4xf32>, tensor<4x4xf32>)
-                     outs(%B: tensor<4x4xf32>)
+                     inits(%B: tensor<4x4xf32>)
     -> tensor<4x4xf32>
 
   // matmul output operand interferes with input operand.
   //     CHECK: linalg.matmul
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]}
   %D = linalg.matmul  ins(%B, %A: tensor<4x4xf32>, tensor<4x4xf32>)
-                     outs(%B: tensor<4x4xf32>)
+                     inits(%B: tensor<4x4xf32>)
     -> tensor<4x4xf32>
 
   // matmul output operand does not interferes with input operand.
   //     CHECK: linalg.matmul
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
   %E = linalg.matmul  ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>)
-                     outs(%B: tensor<4x4xf32>)
+                     inits(%B: tensor<4x4xf32>)
     -> tensor<4x4xf32>
 
   //      CHECK: return
@@ -260,7 +260,7 @@
 
   //      CHECK: linalg.fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?xf32>) -> tensor<?xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<?xf32>) -> tensor<?xf32>
 
   //      CHECK: tensor.insert_slice
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
@@ -292,7 +292,7 @@
 
   //      CHECK: linalg.fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?xf32>) -> tensor<?xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<?xf32>) -> tensor<?xf32>
 
   //      CHECK: tensor.insert_slice
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
@@ -304,7 +304,7 @@
 
   //      CHECK: linalg.fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
-  %5 = linalg.fill ins(%cst : f32) outs(%4 : tensor<?xf32>) -> tensor<?xf32>
+  %5 = linalg.fill ins(%cst : f32) inits(%4 : tensor<?xf32>) -> tensor<?xf32>
 
   %3 = vector.transfer_read %1[%idx2], %cst2 : tensor<?xf32>, vector<5xf32>
 
@@ -336,14 +336,14 @@
   //     CHECK: linalg.matmul
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]}
   %D = linalg.matmul  ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>)
-                     outs(%B: tensor<4x4xf32>)
+                     inits(%B: tensor<4x4xf32>)
     -> tensor<4x4xf32>
 
   // matmul output operand is inplaceable at the function boundary.
   //     CHECK: linalg.matmul
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
   %E = linalg.matmul  ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>)
-                     outs(%C: tensor<4x4xf32>)
+                     inits(%C: tensor<4x4xf32>)
     -> tensor<4x4xf32>
 
   //      CHECK: return
@@ -370,7 +370,7 @@
   //     CHECK: linalg.matmul
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]}
   %D = linalg.matmul  ins(%B, %C: tensor<?x?xf32>, tensor<?x?xf32>)
-                     outs(%sB: tensor<4x4xf32>)
+                     inits(%sB: tensor<4x4xf32>)
     -> tensor<4x4xf32>
 
   // Step 2. %sC forward propagates to an inplace write in %E.
@@ -385,7 +385,7 @@
   //     CHECK: linalg.matmul
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
   %E = linalg.matmul  ins(%A, %sB: tensor<4x4xf32>, tensor<4x4xf32>)
-                     outs(%sC: tensor<4x4xf32>)
+                     inits(%sC: tensor<4x4xf32>)
     -> tensor<4x4xf32>
 
   return %D, %E: tensor<4x4xf32>, tensor<4x4xf32>
@@ -410,7 +410,7 @@
 
   //      CHECK: linalg.matmul
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %18 = linalg.matmul ins(%A, %B : tensor<8x6xf32>, tensor<6x6xf32>) outs(%15 : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %18 = linalg.matmul ins(%A, %B : tensor<8x6xf32>, tensor<6x6xf32>) inits(%15 : tensor<?x?xf32>) -> tensor<?x?xf32>
 
   //      CHECK: tensor.extract_slice
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]}
@@ -450,7 +450,7 @@
   //     CHECK: linalg.matmul
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
   %D = linalg.matmul  ins(%B, %C: tensor<?x?xf32>, tensor<?x?xf32>)
-                     outs(%sB: tensor<4x4xf32>)
+                     inits(%sB: tensor<4x4xf32>)
     -> tensor<4x4xf32>
 
   // Step 2. %sC forward propagates to an inplace write in %E.
@@ -465,7 +465,7 @@
   //     CHECK: linalg.matmul
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
   %E = linalg.matmul  ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>)
-                     outs(%sC: tensor<4x4xf32>)
+                     inits(%sC: tensor<4x4xf32>)
     -> tensor<4x4xf32>
 
   return %D, %E: tensor<4x4xf32>, tensor<4x4xf32>
@@ -504,7 +504,7 @@
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none", "none"]}
   %sA = tensor.extract_slice %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
   %ssA = tensor.extract_slice %sA[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
-  %FA = linalg.fill ins(%f0 : f32) outs(%ssA : tensor<4x4xf32>) -> tensor<4x4xf32>
+  %FA = linalg.fill ins(%f0 : f32) inits(%ssA : tensor<4x4xf32>) -> tensor<4x4xf32>
   %rsA = tensor.insert_slice %FA into %sA[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<?x?xf32>
   %rA = tensor.insert_slice %rsA into %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
 
@@ -527,7 +527,7 @@
   %sB = tensor.extract_slice %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
   %ssB = tensor.extract_slice %sB[0, 0][4, %idx][1, 1] : tensor<?x?xf32> to tensor<4x?xf32>
   %sssB = tensor.extract_slice %ssB[0, 0][4, 4][1, 1] : tensor<4x?xf32> to tensor<4x4xf32>
-  %FB = linalg.fill ins(%f0 : f32) outs(%sssB : tensor<4x4xf32>) -> tensor<4x4xf32>
+  %FB = linalg.fill ins(%f0 : f32) inits(%sssB : tensor<4x4xf32>) -> tensor<4x4xf32>
   %rssB = tensor.insert_slice %FB into %ssB[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<4x?xf32>
   %rsB = tensor.insert_slice %rssB into %sB[0, 0][4, %idx][1, 1] : tensor<4x?xf32> into tensor<?x?xf32>
   %rB = tensor.insert_slice %rsB into %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
@@ -550,7 +550,7 @@
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
   %sC = tensor.extract_slice %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
   %ssC = tensor.extract_slice %sC[0, 0][%sz1, 4][1, 1] : tensor<?x?xf32> to tensor<?x4xf32>
-  %FC = linalg.fill ins(%f0 : f32) outs(%ssC : tensor<?x4xf32>) -> tensor<?x4xf32>
+  %FC = linalg.fill ins(%f0 : f32) inits(%ssC : tensor<?x4xf32>) -> tensor<?x4xf32>
   %rsC = tensor.insert_slice %FC into %sC[0, 0][%sz2, 4][1, 1] : tensor<?x4xf32> into tensor<?x?xf32>
   %rC = tensor.insert_slice %rsC into %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
 
@@ -577,12 +577,12 @@
   // cannot bufferize inplace.
   //     CHECK: fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
-  %A = linalg.fill ins(%f1 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32>
+  %A = linalg.fill ins(%f1 : f32) inits(%I : tensor<64xf32>) -> tensor<64xf32>
 
   // 1. Bufferizes inplace: no alias to %A is yet possible.
   //     CHECK: fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
-  %B = linalg.fill ins(%f2 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32>
+  %B = linalg.fill ins(%f2 : f32) inits(%I : tensor<64xf32>) -> tensor<64xf32>
 
   call @foo(%A) : (tensor<64xf32>) -> ()
   call @foo(%B) : (tensor<64xf32>) -> ()
@@ -613,12 +613,12 @@
   // bufferize inplace.
   //     CHECK: fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
-  %A = linalg.fill ins(%f1 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32>
+  %A = linalg.fill ins(%f1 : f32) inits(%I : tensor<64xf32>) -> tensor<64xf32>
 
   // 4. Bufferizes inplace: no alias to %A is yet possible.
   //     CHECK: fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
-  %B = linalg.fill ins(%f2 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32>
+  %B = linalg.fill ins(%f2 : f32) inits(%I : tensor<64xf32>) -> tensor<64xf32>
 
   // 3. Does not read or write, bufferizes inplace.
   //      CHECK: scf.for
@@ -638,12 +638,12 @@
   // cannot bufferize inplace.
   //     CHECK: fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
-  %A2 = linalg.fill ins(%f1 : f32) outs(%I2 : tensor<64xf32>) -> tensor<64xf32>
+  %A2 = linalg.fill ins(%f1 : f32) inits(%I2 : tensor<64xf32>) -> tensor<64xf32>
 
   // 1. Bufferizes inplace: no alias to %A2 is yet possible.
   //     CHECK: fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
-  %B2 = linalg.fill ins(%f2 : f32) outs(%I2 : tensor<64xf32>) -> tensor<64xf32>
+  %B2 = linalg.fill ins(%f2 : f32) inits(%I2 : tensor<64xf32>) -> tensor<64xf32>
 
   call @bar(%A2) : (tensor<64xf32>) -> ()
   call @bar(%B2) : (tensor<64xf32>) -> ()
@@ -688,8 +688,8 @@
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
   //      CHECK: linalg.fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
-  %8 = linalg.fill ins(%cst_0 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
-  %11 = linalg.fill ins(%cst_1 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %8 = linalg.fill ins(%cst_0 : f32) inits(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %11 = linalg.fill ins(%cst_1 : f32) inits(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
 
   //      CHECK: tensor.extract_slice
   // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
@@ -701,7 +701,7 @@
   %sB = tensor.extract_slice %11[0, 0][16, 256][1, 1]: tensor<256x256xf32> to tensor<16x256xf32>
   %r = linalg.matmul
          ins(%sA, %sB : tensor<256x16xf32>, tensor<16x256xf32>)
-        outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+        inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
 
   //      CHECK: return
   // CHECK-SAME: __equivalent_func_args__ = [2]
@@ -726,7 +726,7 @@
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
   //      CHECK: vector.transfer_write
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"]
-  %8 = linalg.fill ins(%cst_0 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %8 = linalg.fill ins(%cst_0 : f32) inits(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
   %9 = vector.transfer_read %arg0[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32>
   %10 = vector.transfer_write %9, %8[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32>
 
@@ -734,7 +734,7 @@
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
   //      CHECK: vector.transfer_write
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"]
-  %11 = linalg.fill ins(%cst_1 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %11 = linalg.fill ins(%cst_1 : f32) inits(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
   %12 = vector.transfer_read %arg1[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32>
   %13 = vector.transfer_write %12, %11[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32>
 
@@ -748,7 +748,7 @@
   %sB = tensor.extract_slice %13[0, 0][16, 256][1, 1]: tensor<256x256xf32> to tensor<16x256xf32>
   %r = linalg.matmul
          ins(%sA, %sB : tensor<256x16xf32>, tensor<16x256xf32>)
-        outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+        inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
 
   //      CHECK: return
   // CHECK-SAME: __equivalent_func_args__ = [2]
@@ -779,7 +779,7 @@
 
   //      CHECK: linalg.fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]
-  %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<62x90xf32>) -> tensor<62x90xf32>
+  %0 = linalg.fill ins(%cst : f32) inits(%arg2 : tensor<62x90xf32>) -> tensor<62x90xf32>
 
   //      CHECK: tensor.extract_slice
   // CHECK-SAME: {__inplace_operands_attr__ = ["true"]
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir
@@ -211,7 +211,7 @@
 func.func @mini_test_case1() -> tensor<10x20xf32> {
   %f0 = arith.constant 0.0 : f32
   %t = bufferization.alloc_tensor() : tensor<10x20xf32>
-  %r = linalg.fill ins(%f0 : f32) outs(%t : tensor<10x20xf32>) -> tensor<10x20xf32>
+  %r = linalg.fill ins(%f0 : f32) inits(%t : tensor<10x20xf32>) -> tensor<10x20xf32>
   // expected-error @+1 {{operand #0 may return/yield a new buffer allocation}}
   return %r : tensor<10x20xf32>
 }
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
@@ -244,7 +244,7 @@
 //   CHECK-NOT:   copy
 func.func @does_not_read(%t: tensor<?xf32>) -> tensor<?xf32> {
   %f0 = arith.constant 0.0 : f32
-  %r = linalg.fill ins(%f0 : f32) outs(%t : tensor<?xf32>) -> tensor<?xf32>
+  %r = linalg.fill ins(%f0 : f32) inits(%t : tensor<?xf32>) -> tensor<?xf32>
   return %r : tensor<?xf32>
 }
 
@@ -416,12 +416,12 @@
   // CHECK-NEXT:   %[[C0:.*]] = arith.constant 0{{.*}} : f32
   %v0 = arith.constant 0.0 : f32
 
-  // CHECK-NEXT:   linalg.fill ins(%[[C0]] : f32) outs(%[[C]] : memref<f32, strided<[], offset: ?>>)
-  %d = linalg.fill ins(%v0 : f32) outs(%c : tensor<f32>) -> tensor<f32>
+  // CHECK-NEXT:   linalg.fill ins(%[[C0]] : f32) inits(%[[C]] : memref<f32, strided<[], offset: ?>>)
+  %d = linalg.fill ins(%v0 : f32) inits(%c : tensor<f32>) -> tensor<f32>
 
-  // CHECK-NEXT:   linalg.dot ins(%[[A]], %[[B]] : memref<64xf32, strided<[?], offset: ?>>, memref<64xf32, strided<[?], offset: ?>>) outs(%[[C]] : memref<f32, strided<[], offset: ?>>)
+  // CHECK-NEXT:   linalg.dot ins(%[[A]], %[[B]] : memref<64xf32, strided<[?], offset: ?>>, memref<64xf32, strided<[?], offset: ?>>) inits(%[[C]] : memref<f32, strided<[], offset: ?>>)
   %e = linalg.dot ins(%a, %b : tensor<64xf32>,tensor<64xf32>)
-    outs(%d: tensor<f32>) -> tensor<f32>
+    inits(%d: tensor<f32>) -> tensor<f32>
 
   // CHECK-NEXT:   return
   return %e : tensor<f32>
@@ -446,12 +446,12 @@
   %B = bufferization.alloc_tensor() : tensor<64xf32>
   %C = bufferization.alloc_tensor() : tensor<f32>
 
-  //  CHECK-DAG:   linalg.fill ins(%[[C1]] : f32) outs(%[[A]] : memref<64xf32>)
-  //  CHECK-DAG:   linalg.fill ins(%[[C2]] : f32) outs(%[[B]] : memref<64xf32>)
-  //  CHECK-DAG:   linalg.fill ins(%[[C0]] : f32) outs(%[[C]] : memref<f32>)
-  %AA = linalg.fill ins(%v1 : f32) outs(%A : tensor<64xf32>) -> tensor<64xf32>
-  %BB = linalg.fill ins(%v2 : f32) outs(%B : tensor<64xf32>) -> tensor<64xf32>
-  %CC = linalg.fill ins(%v0 : f32) outs(%C : tensor<f32>) -> tensor<f32>
+  //  CHECK-DAG:   linalg.fill ins(%[[C1]] : f32) inits(%[[A]] : memref<64xf32>)
+  //  CHECK-DAG:   linalg.fill ins(%[[C2]] : f32) inits(%[[B]] : memref<64xf32>)
+  //  CHECK-DAG:   linalg.fill ins(%[[C0]] : f32) inits(%[[C]] : memref<f32>)
+  %AA = linalg.fill ins(%v1 : f32) inits(%A : tensor<64xf32>) -> tensor<64xf32>
+  %BB = linalg.fill ins(%v2 : f32) inits(%B : tensor<64xf32>) -> tensor<64xf32>
+  %CC = linalg.fill ins(%v0 : f32) inits(%C : tensor<f32>) -> tensor<f32>
 
   // CHECK-NEXT:   call @init_and_dot(%[[cA]], %[[cB]], %[[cC]])
   %res = call @init_and_dot(%AA, %BB, %CC) :
diff --git a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
@@ -56,11 +56,11 @@
   -> (tensor<5xf32>, tensor<5xf32>)
 {
   // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 0 : i64} : tensor<5xf32>
-  // CHECK: linalg.generic {{.*}} outs(%[[alloc]] : tensor<5xf32>)
+  // CHECK: linalg.generic {{.*}} inits(%[[alloc]] : tensor<5xf32>)
   %r = linalg.generic {
     indexing_maps = [affine_map<(d0) -> (d0)>],
     iterator_types = ["parallel"]}
-    outs(%t : tensor<5xf32>) {
+    inits(%t : tensor<5xf32>) {
       ^bb0(%arg0 : f32) :
         linalg.yield %f : f32
     } -> tensor<5xf32>
@@ -75,11 +75,11 @@
 {
   %0 = tensor.extract_slice %t[0][3][1] : tensor<5xf32> to tensor<3xf32>
   // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 0 : i64} : tensor<3xf32>
-  // CHECK: linalg.generic {{.*}} outs(%[[alloc]] : tensor<3xf32>)
+  // CHECK: linalg.generic {{.*}} inits(%[[alloc]] : tensor<3xf32>)
   %r = linalg.generic {
     indexing_maps = [affine_map<(d0) -> (d0)>],
     iterator_types = ["parallel"]}
-    outs(%0 : tensor<3xf32>) {
+    inits(%0 : tensor<3xf32>) {
       ^bb0(%arg0 : f32) :
         linalg.yield %f : f32
     } -> tensor<3xf32>
diff --git a/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir b/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir
@@ -113,8 +113,8 @@
 // CHECK-SAME:  %[[B:.*]]: memref<9x6xf32>,
 // CHECK-SAME:  %[[C:.*]]: memref<12x6xf32>) -> memref<12x6xf32> {
 func.func @matmul(%A: tensor<12x9xf32>, %B: tensor<9x6xf32>, %C: tensor<12x6xf32>) -> tensor<12x6xf32> {
-  // CHECK: linalg.matmul ins(%[[A]], %[[B]] : memref<12x9xf32>, memref<9x6xf32>) outs(%[[C]] : memref<12x6xf32>)
-  %D = linalg.matmul ins(%A, %B: tensor<12x9xf32>, tensor<9x6xf32>) outs(%C: tensor<12x6xf32>) -> tensor<12x6xf32>
+  // CHECK: linalg.matmul ins(%[[A]], %[[B]] : memref<12x9xf32>, memref<9x6xf32>) inits(%[[C]] : memref<12x6xf32>)
+  %D = linalg.matmul ins(%A, %B: tensor<12x9xf32>, tensor<9x6xf32>) inits(%C: tensor<12x6xf32>) -> tensor<12x6xf32>
   // CHECK: return %[[C]] : memref<12x6xf32>
   return %D : tensor<12x6xf32>
 }
diff --git a/mlir/test/Dialect/GPU/transform-gpu-failing.mlir b/mlir/test/Dialect/GPU/transform-gpu-failing.mlir
--- a/mlir/test/Dialect/GPU/transform-gpu-failing.mlir
+++ b/mlir/test/Dialect/GPU/transform-gpu-failing.mlir
@@ -126,7 +126,7 @@
   %name = gpu.launch async[%stream] blocks(%arg3, %arg4, %arg5) in (%arg9 = %one, %arg10 = %one, %arg11 = %one)
             threads(%arg6, %arg7, %arg8) in (%arg12 = %one, %arg13 = %one, %arg14 = %one)
   {
-    %t = linalg.matmul ins(%x, %y: tensor<32x32xf32>, tensor<32x32xf32>) outs(%z : tensor<32x32xf32>) -> tensor<32x32xf32>
+    %t = linalg.matmul ins(%x, %y: tensor<32x32xf32>, tensor<32x32xf32>) inits(%z : tensor<32x32xf32>) -> tensor<32x32xf32>
     gpu.terminator
   }
   return
@@ -288,7 +288,7 @@
                         affine_map<(d0, d1) -> (d0, d1)>],
        iterator_types = ["parallel", "parallel"]}
       ins(%x : memref<32x32xf32>)
-      outs(%y : memref<32x32xf32>) {
+      inits(%y : memref<32x32xf32>) {
         ^bb0(%in: f32, %out: f32):
           linalg.yield %in : f32
     }
diff --git a/mlir/test/Dialect/LLVM/transform-e2e.mlir b/mlir/test/Dialect/LLVM/transform-e2e.mlir
--- a/mlir/test/Dialect/LLVM/transform-e2e.mlir
+++ b/mlir/test/Dialect/LLVM/transform-e2e.mlir
@@ -7,7 +7,7 @@
 // CHECK-NOT: linalg
 // CHECK: llvm.intr.fmuladd{{.*}}
   %0 = linalg.matmul  ins(%arg0, %arg1: tensor<2x4xf32>, tensor<4x6xf32>)
-                     outs(%arg2: tensor<2x6xf32>)
+                     inits(%arg2: tensor<2x6xf32>)
     -> tensor<2x6xf32>
   return %0 : tensor<2x6xf32>
 }
diff --git a/mlir/test/Dialect/Linalg/affine.mlir b/mlir/test/Dialect/Linalg/affine.mlir
--- a/mlir/test/Dialect/Linalg/affine.mlir
+++ b/mlir/test/Dialect/Linalg/affine.mlir
@@ -10,7 +10,7 @@
   %B = memref.view %arg0[%c0][%K, %N] : memref<?xi8> to memref<?x?xf32>
   %C = memref.view %arg0[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32>
   linalg.matmul ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>)
-               outs(%C: memref<?x?xf32>)
+               inits(%C: memref<?x?xf32>)
   return
 }
 
@@ -19,7 +19,7 @@
 //----------------------------------------------------------------------------//
 func.func @named_batch_matmul(%A: memref<?x?x?xf32>, %B: memref<?x?x?xf32>, %C: memref<?x?x?xf32>) {
   linalg.batch_matmul ins(%A, %B: memref<?x?x?xf32>, memref<?x?x?xf32>)
-                     outs(%C : memref<?x?x?xf32>)
+                     inits(%C : memref<?x?x?xf32>)
   return
 }
 // CHECK-LABEL: @named_batch_matmul
diff --git a/mlir/test/Dialect/Linalg/bubble-up-extract-slice-op.mlir b/mlir/test/Dialect/Linalg/bubble-up-extract-slice-op.mlir
--- a/mlir/test/Dialect/Linalg/bubble-up-extract-slice-op.mlir
+++ b/mlir/test/Dialect/Linalg/bubble-up-extract-slice-op.mlir
@@ -7,7 +7,7 @@
                      affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
   } ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?xf32>)
-    outs(%arg0 : tensor<?x?xf32>) {
+    inits(%arg0 : tensor<?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32):
       %add = arith.addf %b0, %b1 : f32
       linalg.yield %add : f32
@@ -22,7 +22,7 @@
 //      CHECK: %[[SLICE1:.+]] = tensor.extract_slice %arg1[%arg3] [%arg5] [1] : tensor<?xf32> to tensor<?xf32>
 //      CHECK: %[[SLICE2:.+]] = tensor.extract_slice %arg0[%arg2, %arg3] [%arg4, %arg5] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
 //      CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1, #map], iterator_types = ["parallel", "parallel"]}
-// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor<?x?xf32>, tensor<?xf32>) outs(%[[SLICE2]] : tensor<?x?xf32>)
+// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor<?x?xf32>, tensor<?xf32>) inits(%[[SLICE2]] : tensor<?x?xf32>)
 //      CHECK: return %[[GENERIC]] : tensor<?x?xf32>
 
 //-----
@@ -34,7 +34,7 @@
                      affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
   } ins(%arg0, %arg1 : tensor<16x8xf32>, tensor<8xf32>)
-    outs(%arg0 : tensor<16x8xf32>) {
+    inits(%arg0 : tensor<16x8xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32):
       %add = arith.addf %b0, %b1 : f32
       linalg.yield %add : f32
@@ -49,7 +49,7 @@
 //      CHECK: %[[SLICE1:.+]] = tensor.extract_slice %arg1[4] [2] [1] : tensor<8xf32> to tensor<2xf32>
 //      CHECK: %[[SLICE2:.+]] = tensor.extract_slice %arg0[8, 4] [4, 2] [1, 1] : tensor<16x8xf32> to tensor<4x2xf32>
 //      CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1, #map], iterator_types = ["parallel", "parallel"]}
-// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor<4x2xf32>, tensor<2xf32>) outs(%[[SLICE2]] : tensor<4x2xf32>)
+// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor<4x2xf32>, tensor<2xf32>) inits(%[[SLICE2]] : tensor<4x2xf32>)
 //      CHECK: return %[[GENERIC]] : tensor<4x2xf32>
 
 //-----
@@ -61,7 +61,7 @@
                      affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
   } ins(%arg0, %arg1 : tensor<?x8xf32>, tensor<8xf32>)
-    outs(%arg0 : tensor<?x8xf32>) {
+    inits(%arg0 : tensor<?x8xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32):
       %add = arith.addf %b0, %b1 : f32
       linalg.yield %add : f32
@@ -76,7 +76,7 @@
 //      CHECK: %[[SLICE1:.+]] = tensor.extract_slice %arg1[%arg2] [2] [1] : tensor<8xf32> to tensor<2xf32>
 //      CHECK: %[[SLICE2:.+]] = tensor.extract_slice %arg0[8, %arg2] [%arg3, 2] [1, 1] : tensor<?x8xf32> to tensor<?x2xf32>
 //      CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1, #map], iterator_types = ["parallel", "parallel"]}
-// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor<?x2xf32>, tensor<2xf32>) outs(%[[SLICE2]] : tensor<?x2xf32>)
+// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor<?x2xf32>, tensor<2xf32>) inits(%[[SLICE2]] : tensor<?x2xf32>)
 //      CHECK: return %[[GENERIC]] : tensor<?x2xf32>
 
 //-----
@@ -88,7 +88,7 @@
                      affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
   } ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?xf32>)
-    outs(%arg0 : tensor<?x?xf32>) {
+    inits(%arg0 : tensor<?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32):
       %add = arith.addf %b0, %b1 : f32
       linalg.yield %add : f32
@@ -103,7 +103,7 @@
 //      CHECK: %[[SLICE1:.+]] = tensor.extract_slice %arg1[4] [2] [1] : tensor<?xf32> to tensor<2xf32>
 //      CHECK: %[[SLICE2:.+]] = tensor.extract_slice %arg0[8, 4] [4, 2] [1, 1] : tensor<?x?xf32> to tensor<4x2xf32>
 //      CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1, #map], iterator_types = ["parallel", "parallel"]}
-// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor<4x2xf32>, tensor<2xf32>) outs(%[[SLICE2]] : tensor<4x2xf32>)
+// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor<4x2xf32>, tensor<2xf32>) inits(%[[SLICE2]] : tensor<4x2xf32>)
 //      CHECK: return %[[GENERIC]] : tensor<4x2xf32>
 
 //-----
@@ -112,7 +112,7 @@
     %lhs = arith.constant dense<1.0> : tensor<4x4xf32>
     %rhs = arith.constant dense<1.0> : tensor<4x4xf32>
     %dst = arith.constant dense<[[0.0, 1.0, 2.0, 3.0], [4.0, 5.0, 6.0, 7.0], [8.0, 9.0, 10.0, 11.0], [12.0, 13.0, 14.0, 15.0]]> : tensor<4x4xf32>
-    %0 = linalg.matmul ins(%lhs, %rhs : tensor<4x4xf32>, tensor<4x4xf32>) outs(%dst : tensor<4x4xf32>) -> tensor<4x4xf32>
+    %0 = linalg.matmul ins(%lhs, %rhs : tensor<4x4xf32>, tensor<4x4xf32>) inits(%dst : tensor<4x4xf32>) -> tensor<4x4xf32>
     %1 = tensor.extract_slice %0[1,1][2,2][1,1] : tensor<4x4xf32> to tensor<2x2xf32>
     return %1 : tensor<2x2xf32>
 }
@@ -121,7 +121,7 @@
 // CHECK: %[[SLICE0:.+]] = arith.constant dense<1.000000e+00> : tensor<2x4xf32>
 // CHECK: %[[SLICE1:.+]] = arith.constant dense<1.000000e+00> : tensor<4x2xf32>
 // CHECK: %[[SLICE3:.+]] = tensor.extract_slice %[[CST:.+]][1, 1] [2, 2] [1, 1] : tensor<4x4xf32> to tensor<2x2xf32>
-// CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%[[SLICE0]], %[[SLICE1]] : tensor<2x4xf32>, tensor<4x2xf32>) outs(%[[SLICE3]] : tensor<2x2xf32>) -> tensor<2x2xf32>
+// CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%[[SLICE0]], %[[SLICE1]] : tensor<2x4xf32>, tensor<4x2xf32>) inits(%[[SLICE3]] : tensor<2x2xf32>) -> tensor<2x2xf32>
 // CHECK: return %[[MATMUL]] : tensor<2x2xf32>
 
 //-----
@@ -136,12 +136,12 @@
   %cst = arith.constant 0.0 : f32
 
   %init = tensor.empty() : tensor<1x112x112x32xf32>
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
 
   %conv = linalg.conv_2d_nhwc_hwcf
     {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
     ins(%input, %filter : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>)
-    outs(%fill : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
+    inits(%fill : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
 
   %slice = tensor.extract_slice %conv [0, 64, 64, 16] [1, 32, 32, 16] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x32x32x16xf32>
 
@@ -153,8 +153,8 @@
 // CHECK: %[[SLICE0:.+]] = tensor.extract_slice %arg0[0, 128, 128, 0] [1, 65, 65, 3] [1, 1, 1, 1] : tensor<1x225x225x3xf32> to tensor<1x65x65x3xf32>
 // CHECK: %[[SLICE1:.+]] = tensor.extract_slice %arg1[0, 0, 0, 16] [3, 3, 3, 16] [1, 1, 1, 1] : tensor<3x3x3x32xf32> to tensor<3x3x3x16xf32>
 // CHECK: %[[SLICE2:.+]] = tensor.extract_slice %[[INIT]][0, 64, 64, 16] [1, 32, 32, 16] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x32x32x16xf32>
-// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST:.+]] : f32) outs(%[[SLICE2]] : tensor<1x32x32x16xf32>) -> tensor<1x32x32x16xf32>
-// CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%[[SLICE0]], %[[SLICE1]] : tensor<1x65x65x3xf32>, tensor<3x3x3x16xf32>) outs(%[[FILL]] : tensor<1x32x32x16xf32>) -> tensor<1x32x32x16xf32>
+// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST:.+]] : f32) inits(%[[SLICE2]] : tensor<1x32x32x16xf32>) -> tensor<1x32x32x16xf32>
+// CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%[[SLICE0]], %[[SLICE1]] : tensor<1x65x65x3xf32>, tensor<3x3x3x16xf32>) inits(%[[FILL]] : tensor<1x32x32x16xf32>) -> tensor<1x32x32x16xf32>
 // CHECK: return %[[CONV]] : tensor<1x32x32x16xf32>
 
 //-----
@@ -163,7 +163,7 @@
 func.func @rank_reducing_slice(%width : index) -> tensor<1x1x1x?xf32> {
   %cst = arith.constant 1.000000e+00 : f32
   %init = tensor.empty(%width) : tensor<1x?xf32>
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x?xf32>) -> tensor<1x?xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x?xf32>) -> tensor<1x?xf32>
   %slice = tensor.extract_slice %fill[0, 0] [1, %width] [1, 1] : tensor<1x?xf32> to tensor<?xf32>
   %expand = tensor.expand_shape %slice [[0, 1, 2, 3]] : tensor<?xf32> into tensor<1x1x1x?xf32>
   return %expand : tensor<1x1x1x?xf32>
diff --git a/mlir/test/Dialect/Linalg/bufferize.mlir b/mlir/test/Dialect/Linalg/bufferize.mlir
--- a/mlir/test/Dialect/Linalg/bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/bufferize.mlir
@@ -16,7 +16,7 @@
 // CHECK-DAG:       %[[RESULT_MEMREF:.*]] = memref.alloc() {{.*}} : memref<4xf32>
 // CHECK:           linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]}
 // CHECK-SAME:      ins(%[[MEMREF]] : memref<4xf32>)
-// CHECK-SAME:      outs(%[[RESULT_MEMREF]] : memref<4xf32>) {
+// CHECK-SAME:      inits(%[[RESULT_MEMREF]] : memref<4xf32>) {
 // CHECK:           ^bb0(%[[RESULT1:.*]]: f32, %[[UNUSED:.*]]: f32):
 // CHECK:             %[[DIM1:.*]] = math.exp %[[RESULT1]] : f32
 // CHECK:             linalg.yield %[[DIM1]] : f32
@@ -28,7 +28,7 @@
       indexing_maps = [#map0, #map0],
       iterator_types = ["parallel"]
     } ins(%arg0 : tensor<4xf32>)
-      outs(%arg0 : tensor<4xf32>) {
+      inits(%arg0 : tensor<4xf32>) {
       ^bb0(%gen_arg1: f32, %out: f32):
         %tmp1 = math.exp %gen_arg1 : f32
         linalg.yield %tmp1 : f32
@@ -50,14 +50,14 @@
 // CHECK-DAG:     %[[OUT_BUF:.*]] = memref.alloc(%[[SIZE]]) {{.*}} : memref<?xf32>
 // CHECK:         linalg.generic
 // CHECK-SAME:    ins(%[[MEMREF]] : memref<?xf32>)
-// CHECK-SAME:    outs(%[[OUT_BUF]] : memref<?xf32>) {
+// CHECK-SAME:    inits(%[[OUT_BUF]] : memref<?xf32>) {
 func.func @empty_tensor(%in : tensor<?xf32>, %size: index) -> tensor<?xf32> {
   %init = tensor.empty(%size) : tensor<?xf32>
   %0 = linalg.generic {
     indexing_maps = [#map0, #map0],
     iterator_types = ["parallel"]
   } ins(%in : tensor<?xf32>)
-    outs(%init : tensor<?xf32>) {
+    inits(%init : tensor<?xf32>) {
     ^bb0(%gen_arg1: f32, %out: f32):
       %tmp1 = math.exp %gen_arg1 : f32
       linalg.yield %tmp1 : f32
@@ -75,7 +75,7 @@
 // CHECK:           %[[RESULT1:.*]] = memref.alloc() {{.*}} : memref<4xf32>
 // CHECK:           linalg.generic
 // CHECK-SAME:      ins(%{{.*}} : memref<4xf32>)
-// CHECK-SAME:      outs(%[[RESULT0]], %[[RESULT1]] : memref<4xf32>, memref<4xf32>)
+// CHECK-SAME:      inits(%[[RESULT0]], %[[RESULT1]] : memref<4xf32>, memref<4xf32>)
 // CHECK-NEXT: ^bb0(%{{.*}}: f32, %{{.*}}: f32, %{{.*}}: f32):
 func.func @multiple_results(%arg0: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) {
     %0, %1 = linalg.generic {
@@ -108,7 +108,7 @@
 // CHECK-DAG:       %[[MEMREF_ARG:.*]] = bufferization.to_memref %[[ARG]] : memref<?x?xf32>
 // CHECK:           linalg.generic
 // CHECK-SAME:      ins(%[[MEMREF_ARG]] : memref<?x?xf32>)
-// CHECK-SAME:      outs(%[[RESULT0]], %[[RESULT1]] : memref<?x?xf32>, memref<?x?xf32>)
+// CHECK-SAME:      inits(%[[RESULT0]], %[[RESULT1]] : memref<?x?xf32>, memref<?x?xf32>)
 func.func @dynamic_results(%arg0: tensor<?x?xf32>)
          -> (tensor<?x?xf32>, tensor<?x?xf32>) {
     %0, %1 = linalg.generic {
@@ -146,13 +146,13 @@
 // CHECK:           memref.copy %[[ARG1_MEMREF]], %[[INIT_BUFFER]] : memref<3x2xf32> to memref<3x2xf32>
 // CHECK:           linalg.generic
 // CHECK-SAME:      ins(%[[ARG0_MEMREF]] : memref<2x3x4xvector<3x4xi4>>)
-// CHECK-SAME:      outs(%[[INIT_BUFFER]] : memref<3x2xf32>) {
+// CHECK-SAME:      inits(%[[INIT_BUFFER]] : memref<3x2xf32>) {
 func.func @generic_with_init_tensor(%arg0: tensor<2x3x4xvector<3x4xi4>>,
   %arg1: tensor<3x2xf32>) -> (tensor<3x2xf32>) {
 
   %0 = linalg.generic #trait
     ins(%arg0 : tensor<2x3x4xvector<3x4xi4>>)
-   outs(%arg1 : tensor<3x2xf32>) {
+   inits(%arg1 : tensor<3x2xf32>) {
     ^bb(%v0: vector<3x4xi4>, %v1: f32) :
       linalg.yield %v1 : f32
   } -> tensor<3x2xf32>
@@ -167,10 +167,10 @@
 func.func @bufferize_fill(%arg0: tensor<?xf32>) -> tensor<?xf32> {
   %c0 = arith.constant 0.0 : f32
   // CHECK: %[[ALLOC:.*]] = memref.alloc
-  // CHECK: linalg.fill ins(%cst : f32) outs(%[[ALLOC]] : memref<?xf32>)
+  // CHECK: linalg.fill ins(%cst : f32) inits(%[[ALLOC]] : memref<?xf32>)
   // CHECK: %[[TENSOR:.*]] = bufferization.to_tensor %[[ALLOC]] : memref<?xf32>
   // CHECK: return %[[TENSOR]]
-  %0 = linalg.fill ins(%c0 : f32) outs(%arg0 : tensor<?xf32>) -> tensor<?xf32>
+  %0 = linalg.fill ins(%c0 : f32) inits(%arg0 : tensor<?xf32>) -> tensor<?xf32>
   return %0 : tensor<?xf32>
 }
 
@@ -179,13 +179,13 @@
 // CHECK-LABEL:   func @bufferize_dot
 func.func @bufferize_dot(%in: tensor<4xf32>, %out: tensor<f32>) -> tensor<f32> {
   %dot = linalg.dot ins(%in, %in : tensor<4xf32>, tensor<4xf32>)
-                    outs(%out : tensor<f32>) -> tensor<f32>
+                    inits(%out : tensor<f32>) -> tensor<f32>
   return %dot : tensor<f32>
   // CHECK: %[[ALLOC:.*]] = memref.alloc
   // TODO: The copy is not necessary.
   // CHECK: memref.copy {{.*}}, %[[ALLOC]]
   // CHECK: linalg.dot ins(%{{.*}}, %{{.*}} : memref<4xf32>, memref<4xf32>)
-  // CHECK-SAME:       outs(%[[ALLOC:.*]] : memref<f32>)
+  // CHECK-SAME:       inits(%[[ALLOC:.*]] : memref<f32>)
   // CHECK: %[[OUT_TENSOR:.*]] = bufferization.to_tensor %[[ALLOC]] : memref<f32>
   // CHECK: return %[[OUT_TENSOR]]
 }
@@ -202,14 +202,14 @@
 // CHECK:   %[[collapse:.*]] = tensor.collapse_shape %[[arg0]]
 // CHECK:   %[[collapse_m:.*]] = bufferization.to_memref %[[collapse]]
 // CHECK:   %[[alloc:.*]] = memref.alloc()
-// CHECK:   linalg.generic {{.*}} ins(%[[collapse_m]] : memref<6xi1>) outs(%[[alloc]] : memref<6xi64>)
+// CHECK:   linalg.generic {{.*}} ins(%[[collapse_m]] : memref<6xi1>) inits(%[[alloc]] : memref<6xi64>)
 // CHECK:   %[[generic_t:.*]] = bufferization.to_tensor %[[alloc]]
 // CHECK:   %[[call:.*]] = call @csum(%[[generic_t]])
 // CHECK:   return %[[call]]
 func.func public @main(%arg0: tensor<2x3xi1>) -> tensor<6xi64> {
   %0 = tensor.collapse_shape %arg0 [[0, 1]] : tensor<2x3xi1> into tensor<6xi1>
   %1 = tensor.empty() : tensor<6xi64>
-  %2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<6xi1>) outs(%1 : tensor<6xi64>) {
+  %2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<6xi1>) inits(%1 : tensor<6xi64>) {
   ^bb0(%arg1: i1, %arg2: i64):
     %4 = arith.extui %arg1 : i1 to i64
     linalg.yield %4 : i64
diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir
--- a/mlir/test/Dialect/Linalg/canonicalize.mlir
+++ b/mlir/test/Dialect/Linalg/canonicalize.mlir
@@ -10,9 +10,9 @@
   %2 = memref.view %1[%c0][] : memref<?xi8> to memref<16x16xf32>
   %3 = memref.cast %2 : memref<16x16xf32> to memref<?x?xf32>
 
-  // CHECK:  linalg.matmul ins({{.*}}memref<16x16xf32>, memref<16x16xf32>) outs({{.*}}memref<16x16xf32>)
+  // CHECK:  linalg.matmul ins({{.*}}memref<16x16xf32>, memref<16x16xf32>) inits({{.*}}memref<16x16xf32>)
   linalg.matmul ins(%3, %3: memref<?x?xf32>, memref<?x?xf32>)
-               outs(%3: memref<?x?xf32>)
+               inits(%3: memref<?x?xf32>)
   return %3: memref<?x?xf32>
 }
 
@@ -32,7 +32,7 @@
   memref.copy %arg0, %arg0 : memref<0xf32> to memref<0xf32>
 
   // tensor<0xf32> cannot be dce'ed
-  %1 = linalg.generic #trait outs(%arg1 : tensor<0xf32>) {
+  %1 = linalg.generic #trait inits(%arg1 : tensor<0xf32>) {
   ^bb(%0: f32) :
     linalg.yield %0 : f32
   } -> tensor<0xf32>
@@ -56,9 +56,9 @@
   %tc = tensor.cast %c : tensor<3x?xf32> to tensor<?x?xf32>
 
   //      CHECK:  linalg.matmul ins({{.*}}tensor<3x4xf32>, tensor<4x?xf32>)
-  // CHECK-SAME:    outs({{.*}}tensor<3x?xf32>) -> tensor<3x?xf32>
+  // CHECK-SAME:    inits({{.*}}tensor<3x?xf32>) -> tensor<3x?xf32>
   %0 = linalg.matmul ins(%ta, %tb: tensor<?x?xf32>, tensor<?x?xf32>)
-                    outs(%tc: tensor<?x?xf32>) -> tensor<?x?xf32>
+                    inits(%tc: tensor<?x?xf32>) -> tensor<?x?xf32>
 
   %1 = tensor.cast %0 : tensor<?x?xf32> to tensor<3x?xf32>
 
@@ -79,9 +79,9 @@
   %tc = tensor.cast %c : tensor<*xf32> to tensor<?x?xf32>
 
   //      CHECK:  linalg.matmul ins({{.*}}tensor<?x?xf32>, tensor<?x?xf32>)
-  // CHECK-SAME:    outs({{.*}}tensor<?x?xf32>) -> tensor<?x?xf32>
+  // CHECK-SAME:    inits({{.*}}tensor<?x?xf32>) -> tensor<?x?xf32>
   %0 = linalg.matmul ins(%ta, %tb: tensor<?x?xf32>, tensor<?x?xf32>)
-                    outs(%tc: tensor<?x?xf32>) -> tensor<?x?xf32>
+                    inits(%tc: tensor<?x?xf32>) -> tensor<?x?xf32>
 
   //      CHECK:  tensor.cast
   %1 = tensor.cast %0 : tensor<?x?xf32> to tensor<*xf32>
@@ -98,11 +98,11 @@
 func.func @linalg_effects(%a : tensor<?x?xf32>, %b : memref<?x?xf32>, %c : tensor<?x?xf32>) {
   // CHECK-NOT:   %{{.*}} = linalg.matmul
   %t = linalg.matmul ins(%a, %b : tensor<?x?xf32>, memref<?x?xf32>)
-                    outs(%c : tensor<?x?xf32>) -> tensor<?x?xf32>
+                    inits(%c : tensor<?x?xf32>) -> tensor<?x?xf32>
 
   // CHECK:   linalg.matmul
   linalg.matmul ins(%a, %c : tensor<?x?xf32>, tensor<?x?xf32>)
-               outs(%b : memref<?x?xf32>)
+               inits(%b : memref<?x?xf32>)
   return
 }
 
@@ -122,7 +122,7 @@
     indexing_maps = [#map, #map, #map, #map],
     iterator_types = ["parallel", "parallel", "parallel"]
   } ins(%arg0, %arg1 : tensor<?x?x?xf32>, tensor<?x?x?xf32>)
-    outs(%3, %3 : tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
+    inits(%3, %3 : tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
   ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32, %arg5 : f32):
     linalg.yield %arg3, %arg2 : f32, f32
   } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>)
@@ -143,7 +143,7 @@
     indexing_maps = [#map, #map],
     iterator_types = ["parallel", "parallel", "parallel"]
   } ins(%arg0 : tensor<?x?x?xf32>)
-    outs(%out : tensor<1x2x3xf32>) {
+    inits(%out : tensor<1x2x3xf32>) {
   ^bb0(%arg2 : f32, %arg3 : f32):
     linalg.yield %arg2 : f32
   } -> (tensor<1x2x3xf32>)
@@ -163,7 +163,7 @@
     indexing_maps = [#map, #map],
     iterator_types = []
   } ins(%arg0 : f32)
-    outs(%out : tensor<f32>) {
+    inits(%out : tensor<f32>) {
   ^bb0(%arg2 : f32, %arg3 : f32):
     linalg.yield %arg2 : f32
   } -> (tensor<f32>)
@@ -187,7 +187,7 @@
 ^bb1(%arg1 : f32):
   %3 = linalg.generic
     {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]}
-    ins(%arg0 : tensor<?x?xf32>) outs(%2 : tensor<?x?xf32>) {
+    ins(%arg0 : tensor<?x?xf32>) inits(%2 : tensor<?x?xf32>) {
     ^bb0(%arg2: f32, %arg3 : f32):
       linalg.yield %arg1 : f32
     } -> tensor<?x?xf32>
@@ -215,7 +215,7 @@
     {indexing_maps = [#map, #map, #map, #map],
      iterator_types = ["parallel", "parallel"]}
     ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%2, %2 : tensor<?x?xf32>, tensor<?x?xf32>) {
+    inits(%2, %2 : tensor<?x?xf32>, tensor<?x?xf32>) {
     ^bb0(%arg3: f32, %arg4 : f32, %arg5 : f32, %arg6 : f32):
       linalg.yield %arg2, %arg4 : f32, f32
     } -> (tensor<?x?xf32>, tensor<?x?xf32>)
@@ -247,10 +247,10 @@
   %c0_i32 = arith.constant 0 : i32
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
-  %0 = linalg.fill ins(%c0_i32 : i32) outs(%arg0 : tensor<7x7xi32>) -> tensor<7x7xi32>
+  %0 = linalg.fill ins(%c0_i32 : i32) inits(%arg0 : tensor<7x7xi32>) -> tensor<7x7xi32>
   %1 = linalg.matmul ins(%arg1, %arg1: tensor<7x7xf32>, tensor<7x7xf32>)
-                     outs(%arg1: tensor<7x7xf32>) -> tensor<7x7xf32>
-  %2 = linalg.generic #trait outs(%arg0 : tensor<7x7xi32>) {
+                     inits(%arg1: tensor<7x7xf32>) -> tensor<7x7xf32>
+  %2 = linalg.generic #trait inits(%arg0 : tensor<7x7xi32>) {
   ^bb(%3: i32) :
     linalg.yield %3 : i32
   } -> tensor<7x7xi32>
@@ -270,7 +270,7 @@
   %c21 = arith.constant 21 : index
   %c42 = arith.constant 42 : index
   %0 = tensor.empty(%c21, %c42) : tensor<?x?xf32>
-  %1 = linalg.fill ins(%arg1 : f32) outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %1 = linalg.fill ins(%arg1 : f32) inits(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %2 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %3 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
   %4 = tensor.insert_slice %arg0 into %1[%arg2, %arg3] [%2, %3] [1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
@@ -278,7 +278,7 @@
 }
 // CHECK-LABEL: func @propogate_casts
 //       CHECK:   %[[INIT:.+]] = tensor.empty
-//       CHECK:   %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[INIT]]
+//       CHECK:   %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}inits(%[[INIT]]
 //       CHECK:   %[[INSERTED:.+]] = tensor.insert_slice %{{.+}} into %[[FILL]]
 //       CHECK:   %[[RESULT:.+]] = tensor.cast %[[INSERTED]]
 //       CHECK:   return %[[RESULT]]
@@ -302,8 +302,8 @@
   %empty = tensor.empty() : tensor<1x2x3x4xf32>
   // CHECK:      %[[COLLAPSE:.+]] = tensor.collapse_shape
   // CHECK-NEXT: %[[FILL:.+]] = linalg.fill ins(%cst : f32)
-  // CHECK-SAME:   outs(%[[COLLAPSE]] : tensor<6x4xf32>)
-  %fill = linalg.fill ins(%zero : f32) outs(%empty : tensor<1x2x3x4xf32>) -> tensor<1x2x3x4xf32>
+  // CHECK-SAME:   inits(%[[COLLAPSE]] : tensor<6x4xf32>)
+  %fill = linalg.fill ins(%zero : f32) inits(%empty : tensor<1x2x3x4xf32>) -> tensor<1x2x3x4xf32>
   %reshape = tensor.collapse_shape %fill [[0, 1, 2], [3]]
       : tensor<1x2x3x4xf32> into tensor<6x4xf32>
   // CHECK: return %[[FILL]] : tensor<6x4xf32>
@@ -317,8 +317,8 @@
 func.func @fold_fill_reshape_dynamic(%arg0 : tensor<?x?x?x?x?xf32>) -> tensor<?x?xf32> {
   %zero = arith.constant 0.0 : f32
   // CHECK: %[[RESHAPE:.+]] = tensor.collapse_shape %[[ARG0]]
-  %0 = linalg.fill ins(%zero : f32) outs(%arg0 : tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32>
-  // CHECK: %[[RESULT:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[RESHAPE]]
+  %0 = linalg.fill ins(%zero : f32) inits(%arg0 : tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32>
+  // CHECK: %[[RESULT:.+]] = linalg.fill ins(%{{.+}}{{.*}}inits(%[[RESHAPE]]
   %1 = tensor.collapse_shape %0 [[0, 1, 2], [3, 4]]
       : tensor<?x?x?x?x?xf32> into tensor<?x?xf32>
   // CHECK: return %[[RESULT]]
@@ -334,7 +334,7 @@
                                    affine_map<(d0, d1) -> (d0, d1)>],
                   iterator_types = ["parallel", "parallel"]}
     ins(%0 : memref<4x16xf32>)
-    outs(%0 : memref<4x16xf32>) {
+    inits(%0 : memref<4x16xf32>) {
       ^bb0(%arg4: f32, %arg5: f32):
         linalg.yield %arg4 : f32
     }
@@ -346,12 +346,12 @@
 // CHECK-LABEL: func @fold_static_pad_fill
 //       CHECK:   %[[F0:.+]] = arith.constant 0.000000e+00 : f32
 //       CHECK:   %[[INIT:.+]] = tensor.empty() : tensor<412x276xf32>
-//       CHECK:   %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}outs(%[[INIT]]
+//       CHECK:   %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}inits(%[[INIT]]
 //       CHECK:   return %[[FILL]]
 func.func @fold_static_pad_fill() -> tensor<412x276xf32> {
   %f0 = arith.constant 0.0 : f32
   %empty = tensor.empty() : tensor<400x273xf32>
-  %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<400x273xf32>) -> tensor<400x273xf32>
+  %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<400x273xf32>) -> tensor<400x273xf32>
   %pad = tensor.pad %fill low[4, 1] high[8, 2] {
   ^bb0(%arg1: index, %arg2: index):
     tensor.yield %f0 : f32
@@ -371,18 +371,18 @@
 
 //  CHECK-DAG:   %[[I1:.+]] = arith.constant 1 : index
 //  CHECK-DAG:   %[[F0:.+]] = arith.constant 0.000000e+00 : f32
-//      CHECK:   %[[OF:.+]] = linalg.fill ins(%[[F0]] : f32) outs(%[[SRC]] : tensor<8x?x16x32xf32>)
+//      CHECK:   %[[OF:.+]] = linalg.fill ins(%[[F0]] : f32) inits(%[[SRC]] : tensor<8x?x16x32xf32>)
 //      CHECK:   %[[S0:.+]] = affine.apply #[[MAP0]]()[%[[LOW0]]]
 //      CHECK:   %[[DIM1:.+]] = tensor.dim %[[OF]], %[[I1]] : tensor<8x?x16x32xf32>
 //      CHECK:   %[[S1:.+]] = affine.apply #[[MAP1]]()[%[[DIM1]]]
 //      CHECK:   %[[S2:.+]] = affine.apply #[[MAP2]]()[%[[HIGH2]]]
 //      CHECK:   %[[S3:.+]] = affine.apply #[[MAP3]]()[%[[LOW3]], %[[HIGH3]]]
 //      CHECK:   %[[INIT:.+]] = tensor.empty(%[[S0]], %[[S1]], %[[S2]], %[[S3]]) : tensor<?x?x?x?xf32>
-//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}outs(%[[INIT]]
+//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}inits(%[[INIT]]
 //      CHECK:   return %[[FILL]]
 func.func @fold_dynamic_pad_fill(%empty: tensor<8x?x16x32xf32>, %low0: index, %low3: index, %high2: index, %high3: index) -> tensor<?x?x?x?xf32> {
   %f0 = arith.constant 0.0 : f32
-  %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x?x16x32xf32>) -> tensor<8x?x16x32xf32>
+  %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<8x?x16x32xf32>) -> tensor<8x?x16x32xf32>
   %pad = tensor.pad %fill low[%low0, 8, 7, %low3] high[1, 2, %high2, %high3] {
   ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
     tensor.yield %f0 : f32
@@ -397,7 +397,7 @@
   %f0 = arith.constant 0.0 : f32
   %f1 = arith.constant 1.0 : f32
   %empty = tensor.empty() : tensor<400x273xf32>
-  %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<400x273xf32>) -> tensor<400x273xf32>
+  %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<400x273xf32>) -> tensor<400x273xf32>
   // CHECK: tensor.pad
   %pad = tensor.pad %fill low[4, 1] high[8, 2] {
   ^bb0(%arg1: index, %arg2: index):
@@ -427,7 +427,7 @@
     indexing_maps = [#map, #map, #map],
     iterator_types = ["parallel", "parallel", "parallel"]
   } ins(%arg0, %arg1 : tensor<2x3x4xf32>, tensor<?x?x?xf32>)
-    outs(%3 : tensor<?x?x?xf32>) {
+    inits(%3 : tensor<?x?x?xf32>) {
   ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32):
     %9 = arith.addf %arg2, %arg3 : f32
     linalg.yield %9 : f32
@@ -437,7 +437,7 @@
     //  CHECK:      %[[CAST_ARG1:.*]] = tensor.cast %[[ARG1]] : tensor<?x?x?xf32> to tensor<2x3x4xf32>
     //  CHECK-NEXT: %[[GENERIC_OP:.*]] = linalg.generic
     //  CHECK-SAME: ins(%[[ARG0]], %[[CAST_ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>)
-    //  CHECK-SAME: outs({{.*}} : tensor<2x3x4xf32>)
+    //  CHECK-SAME: inits({{.*}} : tensor<2x3x4xf32>)
 }
 
 // -----
@@ -458,7 +458,7 @@
     indexing_maps = [#map, #map, #map],
     iterator_types = ["parallel", "parallel", "parallel"]
   } ins(%arg0, %4 : tensor<2x3x4xf32>, tensor<2x?x?xf32>)
-    outs(%3 : tensor<?x?x?xf32>) {
+    inits(%3 : tensor<?x?x?xf32>) {
   ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32):
     %9 = arith.addf %arg2, %arg3 : f32
     linalg.yield %9 : f32
@@ -468,7 +468,7 @@
     //  CHECK:      %[[CAST_ARG1:.*]] = tensor.cast %[[ARG1]] : tensor<?x?x?xf32> to tensor<2x3x4xf32>
     //  CHECK-NEXT: %[[GENERIC_OP:.*]] = linalg.generic
     //  CHECK-SAME: ins(%[[ARG0]], %[[CAST_ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>)
-    //  CHECK-SAME: outs({{.*}} : tensor<2x3x4xf32>)
+    //  CHECK-SAME: inits({{.*}} : tensor<2x3x4xf32>)
 }
 
 // -----
@@ -490,7 +490,7 @@
     indexing_maps = [#map, #map, #map],
     iterator_types = ["parallel", "parallel", "parallel"]
   } ins(%arg0, %5 : tensor<?x?x?xf32>, tensor<2x?x?xf32>)
-    outs(%4 : tensor<2x3x4xf32>) {
+    inits(%4 : tensor<2x3x4xf32>) {
   ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
     %9 = arith.addf %arg3, %arg4 : f32
     linalg.yield %9 : f32
@@ -500,7 +500,7 @@
     //  CHECK-NEXT: %[[CAST_ARG1:.*]] = tensor.cast %[[ARG1]] : tensor<?x?x?xf32> to tensor<2x3x4xf32>
     //  CHECK-NEXT: %[[GENERIC_OP:.*]] = linalg.generic
     //  CHECK-SAME: ins(%[[CAST_ARG0]], %[[CAST_ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>)
-    //  CHECK-SAME: outs({{.*}} : tensor<2x3x4xf32>)
+    //  CHECK-SAME: inits({{.*}} : tensor<2x3x4xf32>)
 }
 
 // -----
@@ -524,7 +524,7 @@
     indexing_maps = [#map, #map, #map],
     iterator_types = ["parallel", "parallel", "parallel"]
   } ins(%4, %5 : tensor<2x?x?xf32>, tensor<2x?x?xf32>)
-    outs(%3 : tensor<?x?x?xf32>) {
+    inits(%3 : tensor<?x?x?xf32>) {
   ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32):
     %9 = arith.addf %arg2, %arg3 : f32
     linalg.yield %9 : f32
@@ -533,7 +533,7 @@
   return %7: tensor<2x3x4xf32>
     //  CHECK:      %[[GENERIC_OP:.*]] = linalg.generic
     //  CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>)
-    //  CHECK-SAME: outs({{.*}} : tensor<2x3x4xf32>)
+    //  CHECK-SAME: inits({{.*}} : tensor<2x3x4xf32>)
 }
 
 // -----
@@ -548,7 +548,7 @@
     indexing_maps = [#map, #map, #map],
     iterator_types = ["parallel", "parallel", "parallel"]
   } ins(%arg0, %arg1 : tensor<?x?x?xf32>, tensor<1x?x?xf32>)
-    outs(%0 : tensor<?x?x?xf32>) {
+    inits(%0 : tensor<?x?x?xf32>) {
   ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):
     %3 = arith.subf %arg5, %arg6 : f32
     linalg.yield %3 : f32
@@ -556,7 +556,7 @@
   return %2 : tensor<?x?x?xf32>
 // CHECK:      %[[GENERIC_OP:.*]] = linalg.generic
 // CHECK-SAME: ins(%{{.*}}, %[[ARG1]] : tensor<1x?x?xf32>, tensor<1x?x?xf32>)
-// CHECK-SAME: outs(%{{.*}} : tensor<1x?x?xf32>)
+// CHECK-SAME: inits(%{{.*}} : tensor<1x?x?xf32>)
 // CHECK: tensor.cast %[[GENERIC_OP]] : tensor<1x?x?xf32> to tensor<?x?x?xf32>
 }
 
@@ -570,7 +570,7 @@
 //   CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
 //   CHECK-DAG: %[[F0:.+]] = arith.constant 0.000000e+00 : f32
 //       CHECK: %[[INIT:.+]] = tensor.empty()
-//       CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}outs(%[[INIT]]
+//       CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}inits(%[[INIT]]
 //       CHECK: %[[OFFSET1:.+]] = affine.apply #[[$MAP]]()[%[[LOW1]]]
 //       CHECK: %[[D0:.+]] = tensor.dim %[[INPUT]], %[[C0]] : tensor<?x?x?xf32>
 //       CHECK: %[[D1:.+]] = tensor.dim %[[INPUT]], %[[C1]] : tensor<?x?x?xf32>
@@ -584,7 +584,7 @@
     tensor.yield %f0 : f32
   } : tensor<?x?x?xf32> to tensor<8x128x128xf32>
   %empty = tensor.empty() : tensor<8x384x384xf32>
-  %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
+  %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
   %0 = tensor.insert_slice %pad into %fill[0, 1, 2] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   return %0: tensor<8x384x384xf32>
 }
@@ -605,7 +605,7 @@
     tensor.yield %f0 : f32
   } : tensor<7x123x124xf32> to tensor<8x128x128xf32>
   %empty = tensor.empty() : tensor<8x384x384xf32>
-  %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
+  %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
   %0 = tensor.insert_slice %a   into %fill[%offset, 0, 0]  [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   %1 = tensor.insert_slice %a   into %0   [0, 128, %offset][8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   %2 = tensor.insert_slice %pad into %1   [0, 0, 256]      [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
@@ -624,7 +624,7 @@
     tensor.yield %f0 : f32
   } : tensor<7x123x124xf32> to tensor<8x128x128xf32>
   %empty = tensor.empty() : tensor<8x384x384xf32>
-  %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
+  %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
   %0 = tensor.insert_slice %a   into %fill[%offset, 0, 0]  [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   %1 = tensor.insert_slice %a   into %0   [0, 0, 129]      [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   // Range overlap with %1 at dim#3
@@ -644,7 +644,7 @@
     tensor.yield %f0 : f32
   } : tensor<7x123x124xf32> to tensor<8x128x128xf32>
   %empty = tensor.empty() : tensor<8x384x384xf32>
-  %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
+  %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
   %0 = tensor.insert_slice %a   into %fill[0, 0, %offset]  [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   %1 = tensor.insert_slice %a   into %0   [0, 128, 255]    [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   // Range overlap with %0 at dim#3
@@ -664,7 +664,7 @@
     tensor.yield %f0 : f32
   } : tensor<7x123x124xf32> to tensor<8x128x128xf32>
   %empty = tensor.empty() : tensor<8x384x384xf32>
-  %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
+  %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
   // Overlap btween %0 and %1 is fine but not with %2 is fine.
   // CHECK-COUNT-3: tensor.insert_slice
   %0 = tensor.insert_slice %a   into %fill[0, 0, %offset]  [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
@@ -687,7 +687,7 @@
   } : tensor<7x123x124xf32> to tensor<8x128x128xf32>
   %empty = tensor.empty() : tensor<8x384x384xf32>
   // Different filling value than padding value.
-  %fill = linalg.fill ins(%f1 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
+  %fill = linalg.fill ins(%f1 : f32) inits(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
   %0 = tensor.insert_slice %a   into %fill[%offset, 0, 0]  [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   %1 = tensor.insert_slice %a   into %0   [0, 128, %offset][8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   %2 = tensor.insert_slice %pad into %1   [0, 0, 256]      [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
@@ -699,7 +699,7 @@
 func.func @fold_linalgop_with_cast_consumer(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,
     %arg2 : tensor<?x?xf32>) -> (tensor<4x8xf32>, tensor<?x?xf32>) {
   %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
+      inits(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %1 = tensor.cast %0 : tensor<?x?xf32> to tensor<4x8xf32>
   return %1, %0 : tensor<4x8xf32>, tensor<?x?xf32>
 }
@@ -712,7 +712,7 @@
 //   CHECK-DAG:  %[[OUT_CAST:.+]] = tensor.cast %[[ARG2]] : tensor<?x?xf32> to tensor<4x8xf32>
 //       CHECK:  %[[MATMUL:.+]] = linalg.matmul
 //  CHECK-SAME:      ins(%[[LHS_CAST]], %[[RHS_CAST]] :
-//  CHECK-SAME:      outs(%[[OUT_CAST]] :
+//  CHECK-SAME:      inits(%[[OUT_CAST]] :
 //       CHECK:  %[[RESULT_CAST:.+]] = tensor.cast %[[MATMUL]]
 //       CHECK:  return %[[MATMUL]], %[[RESULT_CAST]]
 
@@ -723,7 +723,7 @@
 func.func @linalgop_with_cond_cast_consumer(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,
     %arg2 : tensor<?x?xf32>, %arg3 : i1) -> tensor<?x?xf32> {
   %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
+      inits(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
   scf.if %arg3 {
     %1 = tensor.cast %0 : tensor<?x?xf32> to tensor<4x8xf32>
     func.call @some_use(%1) : (tensor<4x8xf32>) -> ()
@@ -735,7 +735,7 @@
 // CHECK-LABEL: func @linalgop_with_cond_cast_consumer
 //  CHECK-SAME:     (%[[ARG0:.*]]: tensor<?x?xf32>, %[[ARG1:.*]]: tensor<?x?xf32>, %[[ARG2:.*]]: tensor<?x?xf32>, %[[ARG3:.*]]: i1)
 //       CHECK: %[[RES:.*]] = linalg.matmul ins(%[[ARG0]], %[[ARG1]] : tensor<?x?xf32>, tensor<?x?xf32>)
-//  CHECK-SAME:      outs(%[[ARG2]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+//  CHECK-SAME:      inits(%[[ARG2]] : tensor<?x?xf32>) -> tensor<?x?xf32>
 //       CHECK: scf.if %[[ARG3]] {
 //       CHECK:   %[[CAST:.*]] = tensor.cast %[[RES]] : tensor<?x?xf32> to tensor<4x8xf32>
 //       CHECK:   func.call @some_use(%[[CAST]]) : (tensor<4x8xf32>) -> ()
@@ -749,7 +749,7 @@
     %arg1 : tensor<?x?x?x?xf32>,  %arg2 : tensor<?x?x?x?xf32>) ->
     (tensor<4x8x12x16xf32>, tensor<?x?x?x?xf32>) {
   %0 = linalg.conv_2d_nchw_fchw ins(%arg0, %arg1 : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
-      outs(%arg2 : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
+      inits(%arg2 : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
   %1 = tensor.cast %0 : tensor<?x?x?x?xf32> to tensor<4x8x12x16xf32>
   return %1, %0 : tensor<4x8x12x16xf32>, tensor<?x?x?x?xf32>
 }
@@ -760,7 +760,7 @@
 //       CHECK:  %[[OUT_CAST:.+]] = tensor.cast %[[ARG2]] : tensor<?x?x?x?xf32> to tensor<4x8x12x16xf32>
 //       CHECK:  %[[CONV:.+]] = linalg.conv_2d_nchw_fchw
 //  CHECK-SAME:      ins(%[[ARG0]], %[[ARG1]] :
-//  CHECK-SAME:      outs(%[[OUT_CAST]] :
+//  CHECK-SAME:      inits(%[[OUT_CAST]] :
 //       CHECK:  %[[RESULT_CAST:.+]] = tensor.cast %[[CONV]]
 //       CHECK:  return %[[CONV]], %[[RESULT_CAST]]
 
@@ -780,7 +780,7 @@
       indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
                        affine_map<(d0, d1, d2) -> (d1, d2, d0)>,
                        affine_map<(d0, d1, d2) -> (d2, d1, d0)>]}
-      ins(%arg0 : tensor<?x?x?xf32>) outs(%empty1, %empty2 : tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
+      ins(%arg0 : tensor<?x?x?xf32>) inits(%empty1, %empty2 : tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32) :
       linalg.yield %b0, %b0 : f32, f32
     } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>)
@@ -794,7 +794,7 @@
 //   CHECK-DAG:   %[[INIT2:.+]] = tensor.empty() : tensor<3x2x4xf32>
 //       CHECK:   %[[GENERIC:.+]]:2 = linalg.generic
 //  CHECK-SAME:       ins(%[[CAST]] :
-//  CHECK-SAME:       outs(%[[INIT2]], %[[INIT1]] :
+//  CHECK-SAME:       inits(%[[INIT2]], %[[INIT1]] :
 //       CHECK:   %[[RETURN_CAST:.+]] = tensor.cast %[[GENERIC]]#0 : tensor<3x2x4xf32> to tensor<?x?x?xf32>
 //       CHECK:   return %[[RETURN_CAST]], %[[GENERIC]]#1
 
@@ -806,7 +806,7 @@
     indexing_maps = [#map, #map],
     iterator_types = ["parallel"]
   } ins(%arg0 : tensor<?xf32>)
-    outs(%arg1 : memref<?xf32>) {
+    inits(%arg1 : memref<?xf32>) {
   ^bb0(%arg2 : f32, %arg3 : f32):
     linalg.yield %arg2 : f32
   }
@@ -821,7 +821,7 @@
 //  CHECK-SAME:    indexing_maps = [#map, #map],
 //  CHECK-SAME:    iterator_types = ["parallel"]
 //  CHECK-SAME:  } ins(%[[ARG1]] : tensor<?xf32>)
-//  CHECK-SAME:    outs(%[[ARG2]] : memref<?xf32>) {
+//  CHECK-SAME:    inits(%[[ARG2]] : memref<?xf32>) {
 
 // -----
 
@@ -834,13 +834,13 @@
                       affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
      iterator_types = ["parallel"]}
     ins(%1, %1 : memref<4xf32>, memref<4xf32>)
-    outs(%0 : tensor<4xf32>) {
+    inits(%0 : tensor<4xf32>) {
   ^bb0(%in: f32, %in_24: f32, %out: f32):
     linalg.yield %in : f32
   } -> tensor<4xf32>
   %53 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>],
                         iterator_types = ["parallel"]}
-                        outs(%36 : tensor<4xf32>) {
+                        inits(%36 : tensor<4xf32>) {
   ^bb0(%out: f32):
     linalg.yield %out : f32
   } -> tensor<4xf32>
@@ -856,7 +856,7 @@
     indexing_maps = [#map, #map],
     iterator_types = ["parallel"]
   } ins(%0 : tensor<?xf32>)
-    outs(%arg1 : memref<?xf32>) {
+    inits(%arg1 : memref<?xf32>) {
   ^bb0(%arg2 : f32, %arg3 : f32):
     linalg.yield %arg2 : f32
   }
@@ -870,4 +870,4 @@
 //  CHECK-SAME:    indexing_maps = [#map, #map],
 //  CHECK-SAME:    iterator_types = ["parallel"]
 //  CHECK-SAME:  } ins(%[[ARG1]] : tensor<5xf32>)
-//  CHECK-SAME:    outs(%[[ARG2]] : memref<?xf32>) {
+//  CHECK-SAME:    inits(%[[ARG2]] : memref<?xf32>) {
diff --git a/mlir/test/Dialect/Linalg/collapse-dim.mlir b/mlir/test/Dialect/Linalg/collapse-dim.mlir
--- a/mlir/test/Dialect/Linalg/collapse-dim.mlir
+++ b/mlir/test/Dialect/Linalg/collapse-dim.mlir
@@ -7,7 +7,7 @@
         affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>,
         affine_map<(d0, d1, d2, d3) -> (d0, d1)>],
   iterator_types = ["parallel", "parallel", "reduction", "reduction"]}
-  ins(%arg0 : tensor<2x32x10x4096xf32>) outs(%arg1 : tensor<2x32xf32>) {
+  ins(%arg0 : tensor<2x32x10x4096xf32>) inits(%arg1 : tensor<2x32xf32>) {
   ^bb0(%arg3: f32, %arg4: f32):
     %1 = arith.addf %arg3, %arg4 : f32
     linalg.yield %1 : f32
@@ -22,7 +22,7 @@
 //       CHECK:   %[[T:.*]] = tensor.collapse_shape %{{.*}} {{\[}}[0], [1], [2, 3]] : tensor<2x32x10x4096xf32> into tensor<2x32x40960xf32>
 //       CHECK:   linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]],
 //  CHECK-SAME:     iterator_types = ["parallel", "parallel", "reduction"]}
-//  CHECK-SAME:     ins(%[[T]] : tensor<2x32x40960xf32>) outs(%{{.*}} : tensor<2x32xf32>) {
+//  CHECK-SAME:     ins(%[[T]] : tensor<2x32x40960xf32>) inits(%{{.*}} : tensor<2x32xf32>) {
 //       CHECK:   } -> tensor<2x32xf32>
 
 // -----
@@ -34,7 +34,7 @@
         affine_map<(d0, d1, d2, d3) -> (d1, d0, d2, d3)>,
         affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>],
   iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
-  ins(%arg0 : tensor<32x2x10x4096xf32>) outs(%arg1 : tensor<2x32x10x4096xf32>) {
+  ins(%arg0 : tensor<32x2x10x4096xf32>) inits(%arg1 : tensor<2x32x10x4096xf32>) {
   ^bb0(%arg3: f32, %arg4: f32):
     %1 = arith.addf %arg3, %arg4 : f32
     linalg.yield %1 : f32
@@ -50,6 +50,6 @@
 //   CHECK-DAG:  %[[D:.*]] = tensor.collapse_shape %{{.*}} {{\[}}[0], [1], [2, 3]] : tensor<2x32x10x4096xf32> into tensor<2x32x40960xf32>
 //       CHECK:  %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]],
 //  CHECK-SAME:     iterator_types = ["parallel", "parallel", "parallel"]}
-//  CHECK-SAME:     ins(%[[S]] : tensor<32x2x40960xf32>) outs(%[[D]] : tensor<2x32x40960xf32>) {
+//  CHECK-SAME:     ins(%[[S]] : tensor<32x2x40960xf32>) inits(%[[D]] : tensor<2x32x40960xf32>) {
 //       CHECK:   } -> tensor<2x32x40960xf32>
 //       CHECK:  tensor.expand_shape %[[R]] {{\[}}[0], [1], [2, 3]] : tensor<2x32x40960xf32> into tensor<2x32x10x4096xf32>
diff --git a/mlir/test/Dialect/Linalg/conv-interface-invalid.mlir b/mlir/test/Dialect/Linalg/conv-interface-invalid.mlir
--- a/mlir/test/Dialect/Linalg/conv-interface-invalid.mlir
+++ b/mlir/test/Dialect/Linalg/conv-interface-invalid.mlir
@@ -18,7 +18,7 @@
   %0 = test.linalg_conv_op {
       indexing_maps = [#map, #map],
       iterator_types = [#test.iterator_type<parallel>]}
-      ins(%arg0 : tensor<?xf32>) outs(%arg1 : tensor<?xf32>) {
+      ins(%arg0 : tensor<?xf32>) inits(%arg1 : tensor<?xf32>) {
       ^bb0(%arg2 : f32, %arg3 : f32):
          linalg.yield  %arg3 : f32
       } -> tensor<?xf32>
@@ -37,7 +37,7 @@
       iterator_types = [#test.iterator_type<parallel>,
                         #test.iterator_type<reduction>]}
       ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
-      outs(%arg2 : tensor<?xf32>) {
+      inits(%arg2 : tensor<?xf32>) {
       ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
          linalg.yield %arg5 : f32
       } -> tensor<?xf32>
@@ -56,7 +56,7 @@
       iterator_types = [#test.iterator_type<parallel>,
                         #test.iterator_type<reduction>]}
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?xf32>)
-      outs(%arg2 : tensor<?xf32>) {
+      inits(%arg2 : tensor<?xf32>) {
       ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
          linalg.yield %arg5 : f32
       } -> tensor<?xf32>
@@ -75,7 +75,7 @@
       iterator_types = [#test.iterator_type<parallel>,
                         #test.iterator_type<reduction>]}
       ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
-      outs(%arg2 : tensor<?xf32>) {
+      inits(%arg2 : tensor<?xf32>) {
       ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
          linalg.yield %arg5 : f32
       } -> tensor<?xf32>
@@ -94,7 +94,7 @@
       iterator_types = [#test.iterator_type<parallel>,
                         #test.iterator_type<parallel>]}
       ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
-      outs(%arg2 : tensor<?xf32>) {
+      inits(%arg2 : tensor<?xf32>) {
       ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
          linalg.yield %arg5 : f32
       } -> tensor<?xf32>
@@ -115,7 +115,7 @@
       iterator_types = [#test.iterator_type<parallel>,
                         #test.iterator_type<parallel>]}
       ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
-      outs(%arg2 : tensor<?x?xf32>) {
+      inits(%arg2 : tensor<?x?xf32>) {
       ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
          linalg.yield %arg5 : f32
       } -> tensor<?x?xf32>
@@ -136,7 +136,7 @@
                         #test.iterator_type<reduction>,
                         #test.iterator_type<parallel>]}
       ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
-      outs(%arg2 : tensor<?x?xf32>) {
+      inits(%arg2 : tensor<?x?xf32>) {
       ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
          linalg.yield %arg5 : f32
       } -> tensor<?x?xf32>
@@ -157,7 +157,7 @@
                         #test.iterator_type<reduction>,
                         #test.iterator_type<reduction>]}
       ins(%arg0, %arg1 : tensor<?xf32>, tensor<?x?xf32>)
-      outs(%arg2 : tensor<?xf32>) {
+      inits(%arg2 : tensor<?xf32>) {
       ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
          linalg.yield %arg5 : f32
       } -> tensor<?xf32>
@@ -178,7 +178,7 @@
                         #test.iterator_type<reduction>,
                         #test.iterator_type<reduction>]}
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?xf32>)
-      outs(%arg2 : tensor<?xf32>) {
+      inits(%arg2 : tensor<?xf32>) {
       ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
          linalg.yield %arg5 : f32
       } -> tensor<?xf32>
@@ -198,7 +198,7 @@
       iterator_types = [#test.iterator_type<parallel>,
                         #test.iterator_type<parallel>]}
       ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
-      outs(%arg2 : tensor<?xf32>) {
+      inits(%arg2 : tensor<?xf32>) {
       ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
          linalg.yield %arg5 : f32
       } -> tensor<?xf32>
diff --git a/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir b/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir
--- a/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir
+++ b/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir
@@ -11,7 +11,7 @@
     %0 = linalg.conv_2d_nhwc_hwcf
       {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> }
        ins(%arg0, %arg1: tensor<?x?x?x?xf32>, tensor<3x3x4x16xf32>)
-      outs(%arg2: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
+      inits(%arg2: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
     return %0 : tensor<?x?x?x?xf32>
 }
 
@@ -57,7 +57,7 @@
 //           CHECK-SAME: tensor<3x3x4x16xf32> into tensor<36x16xf32>
 //      CHECK-DAG: %[[RESHAPED_OUTPUT:.+]] = tensor.collapse_shape %[[OUTPUT]]
 //           CHECK-SAME: [0, 1, 2], [3]
-//      CHECK: %[[MATMUL_RESULT:.+]] = linalg.matmul ins(%[[RESHAPED_INIT_COL_TENSOR]], %[[RESHAPED_FILTER]] : tensor<196x36xf32>, tensor<36x16xf32>) outs(%[[RESHAPED_OUTPUT]] : tensor<196x16xf32>)
+//      CHECK: %[[MATMUL_RESULT:.+]] = linalg.matmul ins(%[[RESHAPED_INIT_COL_TENSOR]], %[[RESHAPED_FILTER]] : tensor<196x36xf32>, tensor<36x16xf32>) inits(%[[RESHAPED_OUTPUT]] : tensor<196x16xf32>)
 //      CHECK: %[[RESULT:.+]] = tensor.expand_shape %[[MATMUL_RESULT]] {{\[}}[0, 1, 2], [3]] : tensor<196x16xf32> into tensor<1x14x14x16xf32>
 //      CHECK: return %[[RESULT]]
 
@@ -65,7 +65,7 @@
     %0 = linalg.conv_2d_nhwc_hwcf
       {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> }
        ins(%arg0, %arg1: tensor<1x16x16x4xf32>, tensor<3x3x4x16xf32>)
-      outs(%arg2: tensor<1x14x14x16xf32>) -> tensor<1x14x14x16xf32>
+      inits(%arg2: tensor<1x14x14x16xf32>) -> tensor<1x14x14x16xf32>
     return %0 : tensor<1x14x14x16xf32>
 }
 
@@ -94,7 +94,7 @@
 //      CHECK: %[[INPUT_T:.+]] = linalg.generic
 // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"]
-// CHECK-SAME: ins(%[[INPUT]] : tensor<1x114x114x16xf32>) outs(%[[INPUT_T_INIT]] : tensor<1x16x114x114xf32>) {
+// CHECK-SAME: ins(%[[INPUT]] : tensor<1x114x114x16xf32>) inits(%[[INPUT_T_INIT]] : tensor<1x16x114x114xf32>) {
 // CHECK-NEXT: ^bb0(%[[ARG3:.+]]: f32, %[[ARG4:.+]]: f32):
 // CHECK-NEXT:     linalg.yield %[[ARG3]] : f32
 // CHECK-NEXT:  } -> tensor<1x16x114x114xf32>
@@ -102,7 +102,7 @@
 //      CHECK: %[[FILTER_T:.+]] = linalg.generic
 // CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP3]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"]
-// CHECK-SAME: ins(%[[FILTER]] : tensor<3x3x16xf32>) outs(%[[FILTER_T_INIT]] : tensor<16x3x3xf32>) {
+// CHECK-SAME: ins(%[[FILTER]] : tensor<3x3x16xf32>) inits(%[[FILTER_T_INIT]] : tensor<16x3x3xf32>) {
 // CHECK-NEXT:      ^bb0(%{{.*}}: f32, %{{.*}}: f32):
 //      CHECK:      linalg.yield
 //      CHECK:    } -> tensor<16x3x3xf32>
@@ -110,7 +110,7 @@
 //      CHECK: %[[OUTPUT_T:.+]] = linalg.generic
 // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"]
-// CHECK-SAME: ins(%[[OUTPUT]] : tensor<1x112x112x16xf32>) outs(%[[INIT_OUTPUT_TENSOR]] : tensor<1x16x112x112xf32>) {
+// CHECK-SAME: ins(%[[OUTPUT]] : tensor<1x112x112x16xf32>) inits(%[[INIT_OUTPUT_TENSOR]] : tensor<1x16x112x112xf32>) {
 // CHECK-NEXT:  ^bb0(%{{.*}}: f32, %{{.*}}: f32):
 // CHECK-NEXT:     linalg.yield
 // CHECK-NEXT:  } -> tensor<1x16x112x112xf32>
@@ -118,7 +118,7 @@
 //      CHECK: %[[COL_TENSOR:.+]] = linalg.generic
 // CHECK-SAME: indexing_maps = [#[[MAP4]], #[[MAP5]]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]
-// CHECK-SAME:   ins(%[[INPUT_T]] : tensor<1x16x114x114xf32>) outs(%[[INIT_COL_TENSOR]] : tensor<1x16x112x112x3x3xf32>) {
+// CHECK-SAME:   ins(%[[INPUT_T]] : tensor<1x16x114x114xf32>) inits(%[[INIT_COL_TENSOR]] : tensor<1x16x112x112x3x3xf32>) {
 // CHECK-NEXT:      ^bb0(%{{.*}}: f32, %{{.*}}: f32):
 // CHECK-NEXT:         linalg.yield
 // CHECK-NEXT:    } -> tensor<1x16x112x112x3x3xf32>
@@ -128,14 +128,14 @@
 // CHECK-SAME:    tensor<16x3x3xf32> into tensor<16x9xf32>
 //      CHECK: %[[OUTPUT_T_R:.+]] = tensor.collapse_shape %[[OUTPUT_T]]
 // CHECK-SAME:    tensor<1x16x112x112xf32> into tensor<16x12544xf32>
-//      CHECK: %[[BMV_RESULT:.+]] = linalg.batch_matvec ins(%[[COL_TENSOR_R]], %[[FILTER_T_R]] : tensor<16x12544x9xf32>, tensor<16x9xf32>) outs(%[[OUTPUT_T_R]] : tensor<16x12544xf32>) -> tensor<16x12544xf32>
+//      CHECK: %[[BMV_RESULT:.+]] = linalg.batch_matvec ins(%[[COL_TENSOR_R]], %[[FILTER_T_R]] : tensor<16x12544x9xf32>, tensor<16x9xf32>) inits(%[[OUTPUT_T_R]] : tensor<16x12544xf32>) -> tensor<16x12544xf32>
 //      CHECK: %[[RESULT_R:.+]] = tensor.expand_shape %[[BMV_RESULT]]
 // CHECK-SAME:    tensor<16x12544xf32> into tensor<1x16x112x112xf32>
 //      CHECK: %[[RESULT_INIT:.+]] = tensor.empty() : tensor<1x112x112x16xf32>
 //      CHECK: %[[RESULT:.+]] = linalg.generic
 // CHECK-SAME: indexing_maps = [#[[MAP6]], #[[MAP1]]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"]
-// CHECK-SAME: ins(%[[RESULT_R]] : tensor<1x16x112x112xf32>) outs(%[[RESULT_INIT]] : tensor<1x112x112x16xf32>) {
+// CHECK-SAME: ins(%[[RESULT_R]] : tensor<1x16x112x112xf32>) inits(%[[RESULT_INIT]] : tensor<1x112x112x16xf32>) {
 // CHECK-NEXT:      ^bb0(%{{.*}}: f32, %{{.*}}: f32):
 // CHECK-NEXT:      linalg.yield
 // CHECK-NEXT:    } -> tensor<1x112x112x16xf32>
@@ -144,7 +144,7 @@
     %0 = linalg.depthwise_conv_2d_nhwc_hwc {
       dilations = dense<1> : tensor<2xi64>,
       strides = dense<1> : tensor<2xi64>
-    } ins(%input, %filter : tensor<1x114x114x16xf32>, tensor<3x3x16xf32>) outs(%output : tensor<1x112x112x16xf32>) -> tensor<1x112x112x16xf32>
+    } ins(%input, %filter : tensor<1x114x114x16xf32>, tensor<3x3x16xf32>) inits(%output : tensor<1x112x112x16xf32>) -> tensor<1x112x112x16xf32>
     return %0 : tensor<1x112x112x16xf32>
 }
 
@@ -169,7 +169,7 @@
 // CHECK-SAME:      indexing_maps = [#[[MAP0]], #[[MAP1]]]
 // CHECK-SAME:      iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]
 // CHECK-SAME:   ins(%[[INPUT]] : tensor<8x16x16x4xf32>)
-// CHECK-SAME:   outs(%[[IT]] : tensor<8x14x14x3x3x4xf32>)
+// CHECK-SAME:   inits(%[[IT]] : tensor<8x14x14x3x3x4xf32>)
 //      CHECK:   %[[CS_INPUT:.+]] = tensor.collapse_shape %[[IMG2COL]] {{\[}}[0], [1, 2], [3, 4, 5]] : tensor<8x14x14x3x3x4xf32> into tensor<8x196x36xf32>
 //      CHECK:   %[[CS_FILTER:.+]] = tensor.collapse_shape %[[FILTER]] {{\[}}[0, 1, 2], [3]] : tensor<3x3x4x16xf32> into tensor<36x16xf32>
 //      CHECK:   %[[CS_RESULT:.+]] = tensor.collapse_shape %[[INIT]] {{\[}}[0], [1, 2], [3]] : tensor<8x14x14x16xf32> into tensor<8x196x16xf32>
@@ -177,7 +177,7 @@
 // CHECK-SAME:      indexing_maps = [#[[LHSMAP]], #[[RHSMAP]], #[[RESMAP]]],
 // CHECK-SAME:      iterator_types = ["parallel", "parallel", "parallel", "reduction"]
 // CHECK-SAME:   ins(%[[CS_INPUT]], %[[CS_FILTER]] : tensor<8x196x36xf32>, tensor<36x16xf32>)
-// CHECK-SAME:   outs(%[[CS_RESULT]] : tensor<8x196x16xf32>)
+// CHECK-SAME:   inits(%[[CS_RESULT]] : tensor<8x196x16xf32>)
 //      CHECK:   ^bb0(%[[ARG0:.+]]: f32, %[[ARG1:.+]]: f32, %[[ARG2:.+]]: f32):
 //      CHECK:     %[[MUL:.+]] = arith.mulf %[[ARG0]], %[[ARG1]] : f32
 //      CHECK:     %[[ADD:.+]] = arith.addf %[[MUL]], %[[ARG2]] : f32
@@ -189,7 +189,7 @@
     %0 = linalg.conv_2d_nhwc_hwcf
       {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> }
        ins(%arg0, %arg1: tensor<8x16x16x4xf32>, tensor<3x3x4x16xf32>)
-      outs(%arg2: tensor<8x14x14x16xf32>) -> tensor<8x14x14x16xf32>
+      inits(%arg2: tensor<8x14x14x16xf32>) -> tensor<8x14x14x16xf32>
     return %0 : tensor<8x14x14x16xf32>
 }
 
@@ -214,7 +214,7 @@
 // CHECK-SAME:      indexing_maps = [#[[MAP0]], #[[MAP1]]]
 // CHECK-SAME:      iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]
 // CHECK-SAME:   ins(%[[INPUT]] : tensor<8x4x16x16xf32>)
-// CHECK-SAME:   outs(%[[IT]] : tensor<8x4x3x3x14x14xf32>)
+// CHECK-SAME:   inits(%[[IT]] : tensor<8x4x3x3x14x14xf32>)
 //      CHECK:   %[[CS_FILTER:.+]] = tensor.collapse_shape %[[FILTER]] {{\[}}[0], [1, 2, 3]] : tensor<16x4x3x3xf32> into tensor<16x36xf32>
 //      CHECK:   %[[CS_INPUT:.+]] = tensor.collapse_shape %[[IMG2COL]] {{\[}}[0], [1, 2, 3], [4, 5]] : tensor<8x4x3x3x14x14xf32> into tensor<8x36x196xf32>
 //      CHECK:   %[[CS_RESULT:.+]] = tensor.collapse_shape %[[INIT]] {{\[}}[0], [1], [2, 3]] : tensor<8x16x14x14xf32> into tensor<8x16x196xf32>
@@ -222,7 +222,7 @@
 // CHECK-SAME:      indexing_maps = [#[[LHSMAP]], #[[RHSMAP]], #[[RESMAP]]],
 // CHECK-SAME:      iterator_types = ["parallel", "parallel", "parallel", "reduction"]
 // CHECK-SAME:   ins(%[[CS_FILTER]], %[[CS_INPUT]] : tensor<16x36xf32>, tensor<8x36x196xf32>)
-// CHECK-SAME:   outs(%[[CS_RESULT]] : tensor<8x16x196xf32>)
+// CHECK-SAME:   inits(%[[CS_RESULT]] : tensor<8x16x196xf32>)
 //      CHECK:   ^bb0(%[[ARG0:.+]]: f32, %[[ARG1:.+]]: f32, %[[ARG2:.+]]: f32):
 //      CHECK:     %[[MUL:.+]] = arith.mulf %[[ARG0]], %[[ARG1]] : f32
 //      CHECK:     %[[ADD:.+]] = arith.addf %[[MUL]], %[[ARG2]] : f32
@@ -234,7 +234,7 @@
     %0 = linalg.conv_2d_nchw_fchw
       {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> }
        ins(%arg0, %arg1: tensor<8x4x16x16xf32>, tensor<16x4x3x3xf32>)
-      outs(%arg2: tensor<8x16x14x14xf32>) -> tensor<8x16x14x14xf32>
+      inits(%arg2: tensor<8x16x14x14xf32>) -> tensor<8x16x14x14xf32>
     return %0 : tensor<8x16x14x14xf32>
 }
 
diff --git a/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir b/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir
--- a/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir
+++ b/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir
@@ -10,7 +10,7 @@
   // CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP]], #[[$MAP]]]
   // CHECK-SAME: iterator_types = []
   // CHECK-SAME:  ins(%[[ARG0]], %[[ARG1]]
-  // CHECK-SAME: outs(%[[ARG0]]
+  // CHECK-SAME: inits(%[[ARG0]]
   //      CHECK: ^bb0(%[[LHS:.*]]: f32, %[[RHS:.*]]: f32, %{{.*}}: f32):
   //      CHECK:   %[[YIELD:.*]] = arith.addf %[[LHS]], %[[RHS]] : f32
   //      CHECK:   linalg.yield %[[YIELD]] : f32
@@ -29,7 +29,7 @@
   // CHECK: linalg.generic
   // CHECK-SAME: iterator_types = ["parallel"]
   // CHECK-SAME:  ins(%[[ARG0]], %[[ARG1]]
-  // CHECK-SAME: outs(%[[ARG0]]
+  // CHECK-SAME: inits(%[[ARG0]]
   %0 = arith.addf %arg0, %arg1 : tensor<?xf32>
   return %0 : tensor<?xf32>
 }
@@ -42,7 +42,7 @@
 func.func @exp(%arg0: tensor<f32>) -> tensor<f32> {
   // CHECK: linalg.generic
   // CHECK-SAME:  ins(%[[ARG0]]
-  // CHECK-SAME: outs(%[[ARG0]]
+  // CHECK-SAME: inits(%[[ARG0]]
   // CHECK: ^bb0(%[[SCALAR:.*]]: f32, %{{.*}}: f32):
   // CHECK:   %[[YIELD:.*]] = math.exp %[[SCALAR]] : f32
   // CHECK:   linalg.yield %[[YIELD]] : f32
@@ -60,7 +60,7 @@
 func.func @select(%arg0: tensor<i1>, %arg1: tensor<i32>, %arg2: tensor<i32>) -> tensor<i32> {
   // CHECK: linalg.generic
   // CHECK-SAME:  ins(%[[ARG0]], %[[ARG1]], %[[ARG2]]
-  // CHECK-SAME: outs(%[[ARG1]]
+  // CHECK-SAME: inits(%[[ARG1]]
   // CHECK: ^bb0(%[[PRED:.*]]: i1, %[[TRUE_VAL:.*]]: i32, %[[FALSE_VAL:.*]]: i32, %{{.*}}: i32):
   // CHECK:   arith.select %[[PRED]], %[[TRUE_VAL]], %[[FALSE_VAL]] : i32
   %0 = arith.select %arg0, %arg1, %arg2 : tensor<i1>, tensor<i32>
@@ -78,7 +78,7 @@
   // CHECK: %[[INIT:.*]] = tensor.empty() : tensor<i1>
   // CHECK: linalg.generic
   // CHECK-SAME:  ins(%[[ARG0]], %[[ARG1]]
-  // CHECK-SAME: outs(%[[INIT]]
+  // CHECK-SAME: inits(%[[INIT]]
   // CHECK: ^bb0(%{{.*}}: f32, %{{.*}}: f32, %{{.*}}: i1):
   // CHECK: arith.cmpf olt, %{{.*}}, %{{.*}} : f32
   %0 = arith.cmpf olt, %arg0, %arg1 : tensor<f32>
@@ -101,7 +101,7 @@
   // CHECK: %[[INIT:.*]] = tensor.empty(%[[D1]], %[[D2]], %[[D5]]) : tensor<4x?x?x8x2x?xi1>
   // CHECK: linalg.generic
   // CHECK-SAME:  ins(%[[ARG0]], %[[ARG1]]
-  // CHECK-SAME: outs(%[[INIT]]
+  // CHECK-SAME: inits(%[[INIT]]
   // CHECK: ^bb0(%{{.*}}: f32, %{{.*}}: f32, %{{.*}}: i1):
   // CHECK: arith.cmpf olt, %{{.*}}, %{{.*}} : f32
   %0 = arith.cmpf olt, %arg0, %arg1 : tensor<4x?x?x8x2x?xf32>
diff --git a/mlir/test/Dialect/Linalg/data-layout-propagation.mlir b/mlir/test/Dialect/Linalg/data-layout-propagation.mlir
--- a/mlir/test/Dialect/Linalg/data-layout-propagation.mlir
+++ b/mlir/test/Dialect/Linalg/data-layout-propagation.mlir
@@ -10,7 +10,7 @@
   %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
   %3 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%arg0 : tensor<?x?xf32>)
-      outs(%2 : tensor<?x?xf32>) {
+      inits(%2 : tensor<?x?xf32>) {
     ^bb0(%arg3: f32, %arg4: f32):
       %4 = arith.addf %arg3, %arg3 : f32
       linalg.yield %4 : f32
@@ -41,7 +41,7 @@
 // CHECK-SAME:     indexing_maps = [#[[MAP2]], #[[MAP2]]]
 // CHECK-SAME:     iterator_types = ["parallel", "parallel", "parallel", "parallel"]
 // CHECK-SAME:     ins(%[[PACK_ARG0]]
-// CHECK-SAME:     outs(%[[DEST]]
+// CHECK-SAME:     inits(%[[DEST]]
 // CHECK:        return %[[ELEM]] : tensor<?x?x8x2xf32>
 
 // -----
@@ -51,7 +51,7 @@
   %init = tensor.empty() : tensor<128x256xi32>
   %elem = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%arg0 : tensor<128x256xi32>)
-      outs(%init : tensor<128x256xi32>) {
+      inits(%init : tensor<128x256xi32>) {
     ^bb0(%arg3: i32, %arg4: i32):
       %4 = arith.addi %arg3, %arg3 : i32
       linalg.yield %4 : i32
@@ -74,7 +74,7 @@
 // CHECK-SAME:     indexing_maps = [#[[MAP]], #[[MAP]]]
 // CHECK-SAME:     iterator_types = ["parallel", "parallel", "parallel", "parallel"]
 // CHECK-SAME:     ins(%[[PACK_ARG0]]
-// CHECK-SAME:     outs(%[[DEST]]
+// CHECK-SAME:     inits(%[[DEST]]
 // CHECK:        return %[[ELEM]] : tensor<4x16x16x32xi32>
 
 // -----
@@ -84,7 +84,7 @@
   %init = tensor.empty() : tensor<128x256xi32>
   %elem = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%arg0 : tensor<128x256xi32>)
-      outs(%init : tensor<128x256xi32>) {
+      inits(%init : tensor<128x256xi32>) {
     ^bb0(%arg3: i32, %arg4: i32):
       %4 = arith.addi %arg3, %arg3 : i32
       linalg.yield %4 : i32
@@ -108,7 +108,7 @@
 // CHECK-SAME:     indexing_maps = [#[[MAP0]], #[[MAP0]]]
 // CHECK-SAME:     iterator_types = ["parallel", "parallel", "parallel", "parallel"]
 // CHECK-SAME:     ins(%[[PACK_ARG0]]
-// CHECK-SAME:     outs(%[[DEST]]
+// CHECK-SAME:     inits(%[[DEST]]
 // CHECK:        return %[[ELEM]] : tensor<16x4x32x16xi32>
 
 // -----
@@ -118,7 +118,7 @@
   %init = tensor.empty() : tensor<128x256xi32>
   %elem = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%arg0 : tensor<128x256xi32>)
-      outs(%init : tensor<128x256xi32>) {
+      inits(%init : tensor<128x256xi32>) {
     ^bb0(%arg3: i32, %arg4: i32):
       %4 = arith.addi %arg3, %arg3 : i32
       linalg.yield %4 : i32
@@ -142,7 +142,7 @@
 // CHECK-SAME:     indexing_maps = [#[[MAP0]], #[[MAP0]]]
 // CHECK-SAME:     iterator_types = ["parallel", "parallel", "parallel", "parallel"]
 // CHECK-SAME:     ins(%[[PACK_ARG0]]
-// CHECK-SAME:     outs(%[[DEST]]
+// CHECK-SAME:     inits(%[[DEST]]
 // CHECK:        return %[[ELEM]] : tensor<16x4x16x32xi32>
 
 // -----
@@ -158,7 +158,7 @@
   %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
   %3 = linalg.generic {indexing_maps = [#map1, #map2, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
-      outs(%2 : tensor<?x?xf32>) {
+      inits(%2 : tensor<?x?xf32>) {
     ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
       %4 = arith.addf %arg3, %arg4 : f32
       linalg.yield %4 : f32
@@ -195,7 +195,7 @@
 // CHECK-SAME:     indexing_maps = [#[[MAP2]], #[[MAP3]], #[[MAP4]]]
 // CHECK-SAME:     iterator_types = ["parallel", "parallel", "parallel", "parallel"]
 // CHECK-SAME:     ins(%[[PACK_ARG0]], %[[PACK_ARG0]]
-// CHECK-SAME:     outs(%[[DEST]]
+// CHECK-SAME:     inits(%[[DEST]]
 // CHECK:        return %[[ELEM]] : tensor<?x?x8x2xf32>
 
 // -----
@@ -208,7 +208,7 @@
       indexing_maps = [#map, #map1],
       iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
     ins(%arg0 : tensor<64xf32>)
-    outs(%0 : tensor<1x56x57x64xf32>) {
+    inits(%0 : tensor<1x56x57x64xf32>) {
     ^bb0(%in: f32, %out: f32):
       linalg.yield %in : f32
   } -> tensor<1x56x57x64xf32>
@@ -227,7 +227,7 @@
 // CHECK:       %[[RES:.+]] = linalg.generic
 // CHECK-SAME:    indexing_maps = [#[[MAP0]], #[[MAP1]]]
 // CHECK-SAME:    ins(%[[PACKED_ARG0]]
-// CHECK-SAME:    outs(%[[DEST]]
+// CHECK-SAME:    inits(%[[DEST]]
 
 // -----
 
@@ -241,7 +241,7 @@
                        affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>],
       iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
       ins(%arg0, %arg1, %arg2 : tensor<100x128x200x256xi32>, tensor<100xi32>, tensor<128xi32>)
-      outs(%init_transpose : tensor<100x200x128x256xi32>) {
+      inits(%init_transpose : tensor<100x200x128x256xi32>) {
     ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32):
       %0 = arith.addi %b0, %b1 : i32
       %1 = arith.addi %0, %b2 : i32
@@ -273,7 +273,7 @@
 // CHECK:       %[[RES:.+]] = linalg.generic
 // CHECK-SAME:    indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]]]
 // CHECK-SAME:    ins(%[[PACKED_ARG0]], %[[ARG1]], %[[PACKED_ARG2]]
-// CHECK-SAME:    outs(%[[DEST]]
+// CHECK-SAME:    inits(%[[DEST]]
 
 // -----
 
@@ -287,7 +287,7 @@
                        affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>],
       iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
       ins(%arg0, %arg1, %arg2 : tensor<100x128x200x256xi32>, tensor<100x1x1x1xi32>, tensor<1x128x1x1xi32>)
-      outs(%init_transpose : tensor<100x200x128x256xi32>) {
+      inits(%init_transpose : tensor<100x200x128x256xi32>) {
     ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32):
       %0 = arith.addi %b0, %b1 : i32
       %1 = arith.addi %0, %b2 : i32
@@ -319,7 +319,7 @@
 // CHECK:       %[[RES:.+]] = linalg.generic
 // CHECK-SAME:    indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]]]
 // CHECK-SAME:    ins(%[[PACKED_ARG0]], %[[ARG1]], %[[PACKED_ARG2]]
-// CHECK-SAME:    outs(%[[DEST]]
+// CHECK-SAME:    inits(%[[DEST]]
 
 // -----
 
@@ -336,7 +336,7 @@
                        affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>],
       iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
       ins(%arg0, %arg1, %arg2 : tensor<100x128x200x256xi32>, tensor<100xi32>, tensor<128xi32>)
-      outs(%init_transpose : tensor<100x200x128x256xi32>) {
+      inits(%init_transpose : tensor<100x200x128x256xi32>) {
     ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32):
       %0 = arith.addi %b0, %b1 : i32
       %1 = arith.addi %0, %b2 : i32
@@ -369,7 +369,7 @@
 // CHECK: %[[RES:.+]] = linalg.generic
 // CHECK-SAME:  indexing_maps = [#[[MAP]], #[[MAP1]], #[[MAP2]], #[[MAP]]]
 // CHECK-SAME:  ins(%[[PACKED_ARG0]], %[[ARG1]], %[[PACKED_ARG2]]
-// CHECK-SAME:  outs(%[[DEST]]
+// CHECK-SAME:  inits(%[[DEST]]
 
 // -----
 
@@ -377,7 +377,7 @@
 func.func @elem_pack_transpose_outer_dims(%arg0: tensor<128x256xi32>, %init: tensor<128x256xi32>) -> tensor<16x4x32x16xi32>{
   %elem = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%arg0 : tensor<128x256xi32>)
-      outs(%init : tensor<128x256xi32>) {
+      inits(%init : tensor<128x256xi32>) {
     ^bb0(%arg3: i32, %arg4: i32):
       %4 = arith.addi %arg3, %arg4 : i32
       linalg.yield %4 : i32
@@ -406,7 +406,7 @@
 // CHECK: %[[RES:.+]] = linalg.generic
 // CHECK-SAME:  indexing_maps = [#[[MAP]], #[[MAP]]]
 // CHECK-SAME:  ins(%[[PACKED_ARG0]]
-// CHECK-SAME:  outs(%[[PACKED_ARG1]]
+// CHECK-SAME:  inits(%[[PACKED_ARG1]]
 
 // -----
 
@@ -415,7 +415,7 @@
 func.func @unpack_on_output(%arg0: tensor<12x2x56x56x32xf32>) -> tensor<12x56x56x64xf32> {
   %0 = tensor.empty() : tensor<12x56x56x64xf32>
   %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32>
-  %2 = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%1 : tensor<12x56x56x64xf32>) {
+  %2 = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} inits(%1 : tensor<12x56x56x64xf32>) {
     ^bb0(%out: f32):
       %3 = arith.addf %out, %out : f32
       linalg.yield %3 : f32
@@ -436,7 +436,7 @@
 // CHECK-SAME:  into %[[ARG0_EMPTY_PACK]]
 // CHECK: %[[RES:.+]] = linalg.generic
 // CHECK-SAME:  indexing_maps = [#[[MAP]]]
-// CHECK-SAME:  outs(%[[PACKED_ARG0]]
+// CHECK-SAME:  inits(%[[PACKED_ARG0]]
 // CHECK: %[[UNPACK:.+]] = tensor.unpack %[[RES]]
 // CHECK-SAME:  outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
 // CHECK-SAME:  into %[[ARG0_EMPTY_UNPACK]]
@@ -448,7 +448,7 @@
 func.func @unpack_on_input(%arg0: tensor<12x2x56x56x32xf32>, %init: tensor<12x56x56x64xf32>) -> tensor<12x56x56x64xf32> {
   %0 = tensor.empty() : tensor<12x56x56x64xf32>
   %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32>
-  %2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%1: tensor<12x56x56x64xf32>) outs(%init : tensor<12x56x56x64xf32>) {
+  %2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%1: tensor<12x56x56x64xf32>) inits(%init : tensor<12x56x56x64xf32>) {
     ^bb0(%in: f32, %out: f32):
       %3 = arith.addf %in, %out : f32
       linalg.yield %3 : f32
@@ -475,7 +475,7 @@
 // CHECK: %[[RES:.+]] = linalg.generic
 // CHECK-SAME:  indexing_maps = [#[[MAP]], #[[MAP]]]
 // CHECK-SAME:  ins(%[[ARG0_PACK]]
-// CHECK-SAME:  outs(%[[ARG1_PACK]]
+// CHECK-SAME:  inits(%[[ARG1_PACK]]
 // CHECK: %[[UNPACK:.+]] = tensor.unpack %[[RES]] 
 // CHECK-SAME:  outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] 
 // CHECK-SAME:  into %[[ARG0_UNPACK_EMPTY]]
@@ -487,7 +487,7 @@
 func.func @unpack_element_type_change(%arg0: tensor<12x2x56x56x32xf32>, %init: tensor<12x56x56x64xf16>) -> tensor<12x56x56x64xf16> {
   %0 = tensor.empty() : tensor<12x56x56x64xf32>
   %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32>
-  %2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%1: tensor<12x56x56x64xf32>) outs(%init : tensor<12x56x56x64xf16>) {
+  %2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%1: tensor<12x56x56x64xf32>) inits(%init : tensor<12x56x56x64xf16>) {
     ^bb0(%in: f32, %out: f16):
       %3 = arith.truncf %in : f32 to f16
       linalg.yield %3 : f16
@@ -514,7 +514,7 @@
 // CHECK: %[[RES:.+]] = linalg.generic
 // CHECK-SAME:  indexing_maps = [#[[MAP]], #[[MAP]]]
 // CHECK-SAME:  ins(%[[ARG0_PACK]]
-// CHECK-SAME:  outs(%[[ARG1_PACK]]
+// CHECK-SAME:  inits(%[[ARG1_PACK]]
 // CHECK: %[[ARG0_NEW_EMPTY_UNPACK:.+]] = tensor.empty() : tensor<12x56x56x64xf16>
 // CHECK: %[[UNPACK:.+]] = tensor.unpack %[[RES]]
 // CHECK-SAME:  outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
@@ -528,7 +528,7 @@
   %init = tensor.empty() : tensor<12x56x56x64xf32>
   %0 = tensor.empty() : tensor<12x56x56x64xf32>
   %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32>
-  %2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%1: tensor<12x56x56x64xf32>) outs(%init : tensor<12x56x56x64xf32>) {
+  %2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%1: tensor<12x56x56x64xf32>) inits(%init : tensor<12x56x56x64xf32>) {
     ^bb0(%in: f32, %out: f32):
       %3 = arith.addf %in, %in : f32
       linalg.yield %3 : f32
@@ -551,7 +551,7 @@
 // CHECK: %[[RES:.+]] = linalg.generic
 // CHECK-SAME:  indexing_maps = [#[[MAP]], #[[MAP]]]
 // CHECK-SAME:  ins(%[[PACKED_ARG0]]
-// CHECK-SAME:  outs(%[[DEST]]
+// CHECK-SAME:  inits(%[[DEST]]
 // CHECK: %[[UNPACKED:.+]] = tensor.unpack %[[RES]]
 // CHECK-SAME:  outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] 
 // CHECK-SAME:  into %[[ARG0_UNPACK_EMPTY]]
@@ -629,7 +629,7 @@
   %init = tensor.empty() : tensor<128x256xi32>
   %elem = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%arg0 : tensor<128x256xi32>)
-      outs(%init : tensor<128x256xi32>) {
+      inits(%init : tensor<128x256xi32>) {
     ^bb0(%arg3: i32, %arg4: i32):
       %4 = arith.addi %arg3, %arg3 : i32
       linalg.yield %4 : i32
@@ -647,7 +647,7 @@
 // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<128x256xi32>
 // CHECK-NEXT: %[[GEN:.+]] = linalg.generic
 // CHECK-SAME:  ins(%[[ARG0]]
-// CHECK-SAME:  outs(%[[EMPTY]]
+// CHECK-SAME:  inits(%[[EMPTY]]
 // CHECK: %[[ALLOC:.+]] = bufferization.alloc_tensor() : tensor<4x16x16x32xi32>
 // CHECK-NEXT: %{{.+}} = tensor.pack %[[GEN]]
 // CHECK-SAME:  inner_dims_pos = [1, 0] inner_tiles = [16, 32] 
@@ -660,7 +660,7 @@
 
 func.func @scalar_tensor(%arg0 : tensor<f32>) -> tensor<1x32x7x7x32xf32> {
   %empty_gen = tensor.empty() : tensor<1x7x7x1024xf32>
-  %gen = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<f32>) outs(%empty_gen : tensor<1x7x7x1024xf32>) {
+  %gen = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<f32>) inits(%empty_gen : tensor<1x7x7x1024xf32>) {
   ^bb0(%in: f32, %out: f32):
     linalg.yield %in : f32
   } -> tensor<1x7x7x1024xf32>
@@ -678,4 +678,4 @@
 // CHECK-SAME: indexing_maps = [#[[MAP]], #[[MAP1]]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]
 // CHECK-SAME: ins(%[[ARG0]]
-// CHECK-SAME: outs(%[[EMPTY]]
+// CHECK-SAME: inits(%[[EMPTY]]
diff --git a/mlir/test/Dialect/Linalg/decompose-ops.mlir b/mlir/test/Dialect/Linalg/decompose-ops.mlir
--- a/mlir/test/Dialect/Linalg/decompose-ops.mlir
+++ b/mlir/test/Dialect/Linalg/decompose-ops.mlir
@@ -15,7 +15,7 @@
                      affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]}
     ins(%arg0, %arg1, %arg2 : tensor<?x?xf32>, tensor<?xf32>, tensor<?xf32>)
-    outs(%init1, %init2 : tensor<?x?xf32>, tensor<?x?xf32>) {
+    inits(%init1, %init2 : tensor<?x?xf32>, tensor<?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) :
       %0 = arith.addf %b0, %b1 : f32
       %1 = arith.mulf %0, %b2 : f32
@@ -41,7 +41,7 @@
 // CHECK-SAME:       [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP0]], #[[MAP3]]]
 // CHECK-SAME:       ["parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]], %[[ARG2]] :
-// CHECK-SAME:       outs(%[[INIT1]], %[[INIT2]], %[[INIT1]] :
+// CHECK-SAME:       inits(%[[INIT1]], %[[INIT2]], %[[INIT1]] :
 // CHECK-NEXT:   ^bb0(
 // CHECK-SAME:       %[[B0:[a-zA-Z0-9_]+]]: f32
 // CHECK-SAME:       %[[B1:[a-zA-Z0-9_]+]]: f32
@@ -55,7 +55,7 @@
 // CHECK-SAME:       [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP3]], #[[MAP0]]]
 // CHECK-SAME:       ["parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[GENERIC1]]#2 :
-// CHECK-SAME:       outs(%[[INIT1]], %[[INIT2]] :
+// CHECK-SAME:       inits(%[[INIT1]], %[[INIT2]] :
 // CHECK-NEXT:   ^bb0(
 // CHECK-SAME:       %[[B6:[a-zA-Z0-9_]+]]: f32
 // CHECK-SAME:       %[[B7:[a-zA-Z0-9_]+]]: f32
@@ -87,7 +87,7 @@
 // CANONICALIZECHECK-SAME:       [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
 // CANONICALIZECHECK-SAME:       ["parallel", "parallel"]
 // CANONICALIZECHECK-SAME:       ins(%[[ARG0]], %[[ARG1]] :
-// CANONICALIZECHECK-SAME:       outs(%[[INIT1]] :
+// CANONICALIZECHECK-SAME:       inits(%[[INIT1]] :
 // CANONICALIZECHECK-NEXT:   ^bb0(
 // CANONICALIZECHECK-SAME:       %[[B0:[a-zA-Z0-9_]+]]: f32
 // CANONICALIZECHECK-SAME:       %[[B1:[a-zA-Z0-9_]+]]: f32
@@ -98,7 +98,7 @@
 // CANONICALIZECHECK-SAME:       [#[[MAP3]], #[[MAP2]], #[[MAP0]]]
 // CANONICALIZECHECK-SAME:       ["parallel", "parallel"]
 // CANONICALIZECHECK-SAME:       ins(%[[ARG2]], %[[GENERIC1]] :
-// CANONICALIZECHECK-SAME:       outs(%[[INIT2]] :
+// CANONICALIZECHECK-SAME:       inits(%[[INIT2]] :
 // CANONICALIZECHECK-NEXT:   ^bb0(
 // CANONICALIZECHECK-SAME:       %[[B3:[a-zA-Z0-9_]+]]: f32
 // CANONICALIZECHECK-SAME:       %[[B4:[a-zA-Z0-9_]+]]: f32
@@ -124,7 +124,7 @@
                      affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]}
     ins(%arg0, %arg1, %arg2 : tensor<?x?xf32>, tensor<?xf32>, tensor<?xf32>)
-    outs(%init1, %init2, %init2 : tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>) {
+    inits(%init1, %init2, %init2 : tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32, %b5 : f32) :
       %0 = arith.addf %b0, %b1 : f32
       %1 = arith.mulf %0, %b2 : f32
@@ -150,7 +150,7 @@
 // CHECK-SAME:       [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP0]], #[[MAP0]], #[[MAP3]]]
 // CHECK-SAME:       ["parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]], %[[ARG2]] :
-// CHECK-SAME:       outs(%[[INIT1]], %[[INIT2]], %[[INIT2]], %[[INIT1]] :
+// CHECK-SAME:       inits(%[[INIT1]], %[[INIT2]], %[[INIT2]], %[[INIT1]] :
 // CHECK-NEXT:   ^bb0(
 // CHECK-SAME:       %[[B0:[a-zA-Z0-9_]+]]: f32
 // CHECK-SAME:       %[[B1:[a-zA-Z0-9_]+]]: f32
@@ -165,7 +165,7 @@
 // CHECK-SAME:       [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP3]], #[[MAP0]], #[[MAP0]]]
 // CHECK-SAME:       ["parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[GENERIC1]]#3 :
-// CHECK-SAME:       outs(%[[INIT1]], %[[INIT2]], %[[INIT2]] :
+// CHECK-SAME:       inits(%[[INIT1]], %[[INIT2]], %[[INIT2]] :
 // CHECK-NEXT:   ^bb0(
 // CHECK-SAME:       %[[B7:[a-zA-Z0-9_]+]]: f32
 // CHECK-SAME:       %[[B8:[a-zA-Z0-9_]+]]: f32
@@ -195,7 +195,7 @@
 // CANONICALIZECHECK-SAME:       [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP0]]]
 // CANONICALIZECHECK-SAME:       ["parallel", "parallel"]
 // CANONICALIZECHECK-SAME:       ins(%[[ARG0]], %[[ARG1]] :
-// CANONICALIZECHECK-SAME:       outs(%[[INIT1]], %[[INIT2]] :
+// CANONICALIZECHECK-SAME:       inits(%[[INIT1]], %[[INIT2]] :
 // CANONICALIZECHECK-NEXT:   ^bb0(
 // CANONICALIZECHECK-SAME:       %[[B0:[a-zA-Z0-9_]+]]: f32
 // CANONICALIZECHECK-SAME:       %[[B1:[a-zA-Z0-9_]+]]: f32
@@ -206,7 +206,7 @@
 // CANONICALIZECHECK-SAME:       [#[[MAP3]], #[[MAP2]], #[[MAP0]]]
 // CANONICALIZECHECK-SAME:       ["parallel", "parallel"]
 // CANONICALIZECHECK-SAME:       ins(%[[ARG2]], %[[GENERIC1]]#0 :
-// CANONICALIZECHECK-SAME:       outs(%[[INIT2]] :
+// CANONICALIZECHECK-SAME:       inits(%[[INIT2]] :
 // CANONICALIZECHECK-NEXT:   ^bb0(
 // CANONICALIZECHECK-SAME:       %[[B4:[a-zA-Z0-9_]+]]: f32
 // CANONICALIZECHECK-SAME:       %[[B5:[a-zA-Z0-9_]+]]: f32
@@ -226,7 +226,7 @@
     indexing_maps = [#map0, #map1, #map2],
     iterator_types = ["parallel", "parallel"]}
     ins(%arg0, %arg1 : tensor<10x20xf32>, tensor<10xi32>)
-    outs(%init : tensor<20x10xf64>) {
+    inits(%init : tensor<20x10xf64>) {
     ^bb0(%b0 : f32, %b1 : i32, %b2 : f64):
       %1 = arith.sitofp %b1 : i32 to f64
       %2 = arith.extf %b0 : f32 to f64
@@ -248,7 +248,7 @@
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP0]]]
 // CHECK-SAME:       iterator_types = ["parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]] :
-// CHECK-SAME:       outs(%[[INIT0]], %[[INIT1]] :
+// CHECK-SAME:       inits(%[[INIT0]], %[[INIT1]] :
 // CHECK-NEXT:     ^bb0(
 // CHECK-SAME:         %[[B0:.+]]: f32
 // CHECK-SAME:         %[[B1:.+]]: i32
@@ -260,7 +260,7 @@
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP0]], #[[MAP2]], #[[MAP0]]]
 // CHECK-SAME:       iterator_types = ["parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]], %[[GENERIC0]]#1 :
-// CHECK-SAME:       outs(%[[INIT0]], %[[INIT1]] :
+// CHECK-SAME:       inits(%[[INIT0]], %[[INIT1]] :
 // CHECK-NEXT:     ^bb0(
 // CHECK-SAME:         %[[B4:.+]]: f32
 // CHECK-SAME:         %[[B5:.+]]: i32
@@ -273,7 +273,7 @@
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP0]], #[[MAP0]], #[[MAP2]]]
 // CHECK-SAME:       iterator_types = ["parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]], %[[GENERIC0]]#1, %[[GENERIC1]]#1 :
-// CHECK-SAME:       outs(%[[INIT0]] :
+// CHECK-SAME:       inits(%[[INIT0]] :
 // CHECK-NEXT:     ^bb0(
 // CHECK-SAME:         %[[B9:.+]]: f32
 // CHECK-SAME:         %[[B10:.+]]: i32
@@ -296,7 +296,7 @@
 // CANONICALIZECHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]]]
 // CANONICALIZECHECK-SAME:       iterator_types = ["parallel", "parallel"]
 // CANONICALIZECHECK-SAME:       ins(%[[ARG1]] :
-// CANONICALIZECHECK-SAME:       outs(%[[INIT1]] :
+// CANONICALIZECHECK-SAME:       inits(%[[INIT1]] :
 // CANONICALIZECHECK-NEXT:     ^bb0(
 // CANONICALIZECHECK-SAME:         %[[B0:.+]]: i32
 // CANONICALIZECHECK-SAME:         %[[B1:.+]]: f64
@@ -306,7 +306,7 @@
 // CANONICALIZECHECK-SAME:       indexing_maps = [#[[MAP1]], #[[MAP1]]]
 // CANONICALIZECHECK-SAME:       iterator_types = ["parallel", "parallel"]
 // CANONICALIZECHECK-SAME:       ins(%[[ARG0]] :
-// CANONICALIZECHECK-SAME:       outs(%[[INIT1]] :
+// CANONICALIZECHECK-SAME:       inits(%[[INIT1]] :
 // CANONICALIZECHECK-NEXT:     ^bb0(
 // CANONICALIZECHECK-SAME:         %[[B2:.+]]: f32
 // CANONICALIZECHECK-SAME:         %[[B3:.+]]: f64
@@ -316,7 +316,7 @@
 // CANONICALIZECHECK-SAME:       indexing_maps = [#[[MAP1]], #[[MAP1]], #[[MAP2]]]
 // CANONICALIZECHECK-SAME:       iterator_types = ["parallel", "parallel"]
 // CANONICALIZECHECK-SAME:       ins(%[[GENERIC0]], %[[GENERIC1]] :
-// CANONICALIZECHECK-SAME:       outs(%[[INIT0]] :
+// CANONICALIZECHECK-SAME:       inits(%[[INIT0]] :
 // CANONICALIZECHECK-NEXT:     ^bb0(
 // CANONICALIZECHECK-SAME:         %[[B4:[a-zA-Z0-9_]+]]: f64
 // CANONICALIZECHECK-SAME:         %[[B5:[a-zA-Z0-9_]+]]: f64
@@ -339,7 +339,7 @@
       indexing_maps = [#map0, #map1, #map2, #map3],
       iterator_types = ["parallel", "parallel"]}
       ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
-      outs(%arg2, %arg3 : tensor<?x?xf32>, tensor<?x?xf32>) {
+      inits(%arg2, %arg3 : tensor<?x?xf32>, tensor<?x?xf32>) {
       ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32) :
         %1 = arith.addf %b0, %b2 : f32
         %2 = arith.mulf %b1, %b3 : f32
@@ -360,7 +360,7 @@
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP2]]]
 // CHECK-SAME:       iterator_types = ["parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]] :
-// CHECK-SAME:       outs(%[[ARG2]], %[[ARG3]], %[[ARG2]] :
+// CHECK-SAME:       inits(%[[ARG2]], %[[ARG3]], %[[ARG2]] :
 // CHECK-NEXT:   ^bb0(
 // CHECK-SAME:       %[[ARG4:[a-zA-Z0-9_]+]]: f32
 // CHECK-SAME:       %[[ARG5:[a-zA-Z0-9_]+]]: f32
@@ -373,7 +373,7 @@
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP2]], #[[MAP3]]]
 // CHECK-SAME:       iterator_types = ["parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]], %[[GENERIC1]]#2 :
-// CHECK-SAME:       outs(%[[ARG2]], %[[ARG3]] :
+// CHECK-SAME:       inits(%[[ARG2]], %[[ARG3]] :
 // CHECK-NEXT:   ^bb0(
 // CHECK-SAME:       %[[ARG9:[a-zA-Z0-9_]+]]: f32
 // CHECK-SAME:       %[[ARG10:[a-zA-Z0-9_]+]]: f32
@@ -397,7 +397,7 @@
 // CANONICALIZECHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]]]
 // CANONICALIZECHECK-SAME:       iterator_types = ["parallel", "parallel"]
 // CANONICALIZECHECK-SAME:       ins(%[[ARG0]] :
-// CANONICALIZECHECK-SAME:       outs(%[[ARG2]] :
+// CANONICALIZECHECK-SAME:       inits(%[[ARG2]] :
 // CANONICALIZECHECK-NEXT:   ^bb0(
 // CANONICALIZECHECK-SAME:       %[[ARG4:[a-zA-Z0-9_]+]]: f32
 // CANONICALIZECHECK-SAME:       %[[ARG5:[a-zA-Z0-9_]+]]: f32
@@ -407,7 +407,7 @@
 // CANONICALIZECHECK-SAME:       indexing_maps = [#[[MAP2]], #[[MAP1]], #[[MAP1]], #[[MAP3]]]
 // CANONICALIZECHECK-SAME:       iterator_types = ["parallel", "parallel"]
 // CANONICALIZECHECK-SAME:       ins(%[[ARG1]], %[[GENERIC1]] :
-// CANONICALIZECHECK-SAME:       outs(%[[ARG2]], %[[ARG3]] :
+// CANONICALIZECHECK-SAME:       inits(%[[ARG2]], %[[ARG3]] :
 // CANONICALIZECHECK-NEXT:   ^bb0(
 // CANONICALIZECHECK-SAME:       %[[ARG4:[a-zA-Z0-9_]+]]: f32
 // CANONICALIZECHECK-SAME:       %[[ARG5:[a-zA-Z0-9_]+]]: f32
diff --git a/mlir/test/Dialect/Linalg/detensorize_0d.mlir b/mlir/test/Dialect/Linalg/detensorize_0d.mlir
--- a/mlir/test/Dialect/Linalg/detensorize_0d.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_0d.mlir
@@ -6,7 +6,7 @@
   %0 = tensor.empty() : tensor<f32>
   %1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
     ins(%arg1, %arg2 : tensor<f32>, tensor<f32>)
-    outs(%0 : tensor<f32>) {
+    inits(%0 : tensor<f32>) {
   ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
     %2 = arith.addf %arg3, %arg4 : f32
     linalg.yield %2 : f32
@@ -25,7 +25,7 @@
   %0 = tensor.empty() : tensor<f32>
   %1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
     ins(%arg1, %arg2 : tensor<f32>, tensor<f32>)
-    outs(%0 : tensor<f32>) {
+    inits(%0 : tensor<f32>) {
   ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
     %2 = arith.addf %arg3, %arg4 : f32
     linalg.yield %2 : f32
@@ -34,7 +34,7 @@
   %3 = tensor.empty() : tensor<f32>
   %4 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
     ins(%arg1, %1 : tensor<f32>, tensor<f32>)
-    outs(%3 : tensor<f32>) {
+    inits(%3 : tensor<f32>) {
   ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
     %5 = arith.mulf %arg3, %arg4 : f32
     linalg.yield %5 : f32
@@ -43,7 +43,7 @@
   %6 = tensor.empty() : tensor<f32>
   %7 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
     ins(%1, %4 : tensor<f32>, tensor<f32>)
-    outs(%6 : tensor<f32>) {
+    inits(%6 : tensor<f32>) {
   ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
     %5 = arith.divf %arg3, %arg4 : f32
     linalg.yield %5 : f32
@@ -65,7 +65,7 @@
   %0 = tensor.empty() : tensor<f32>
   %1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
     ins(%arg1, %arg2 : tensor<f32>, tensor<f32>)
-    outs(%0 : tensor<f32>) {
+    inits(%0 : tensor<f32>) {
   ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
     %2 = arith.addf %arg3, %arg4 : f32
     %3 = arith.mulf %2, %arg4 : f32
@@ -86,7 +86,7 @@
   %0 = tensor.empty() : tensor<f32>
   %1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
     ins(%arg1, %arg2 : tensor<f32>, tensor<f32>)
-    outs(%0 : tensor<f32>) {
+    inits(%0 : tensor<f32>) {
   ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
     %2 = "foreign.do_something"(%arg3, %arg4) {} : (f32, f32) -> f32
     linalg.yield %2 : f32
diff --git a/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir b/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir
--- a/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir
@@ -10,7 +10,7 @@
   %3 = linalg.generic
     {indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>], iterator_types = []}
     ins(%arg0_t : tensor<i1>)
-    outs(%2 : tensor<i8>) {
+    inits(%2 : tensor<i8>) {
   ^bb0(%arg2: i1, %arg3: i8):
     %10 = arith.extui %arg2 : i1 to i8
     linalg.yield %10 : i8
@@ -23,7 +23,7 @@
   %7 = linalg.generic
     {indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>, affine_map<() -> ()>], iterator_types = []}
     ins(%arg1_t, %cst : tensor<i32>, tensor<i32>)
-    outs(%6 : tensor<i32>) {
+    inits(%6 : tensor<i32>) {
   ^bb0(%arg2: i32, %arg3: i32, %arg4: i32):
     %10 = arith.addi %arg2, %arg3 : i32
     linalg.yield %10 : i32
diff --git a/mlir/test/Dialect/Linalg/detensorize_if.mlir b/mlir/test/Dialect/Linalg/detensorize_if.mlir
--- a/mlir/test/Dialect/Linalg/detensorize_if.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_if.mlir
@@ -18,7 +18,7 @@
   %3 = tensor.empty() : tensor<i1>
   %4 = linalg.generic #attrs
     ins(%2, %1 : tensor<i32>, tensor<i32>)
-    outs(%3 : tensor<i1>) {
+    inits(%3 : tensor<i1>) {
     ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):
       %8 = arith.cmpi slt, %arg0, %arg1 : i32
       linalg.yield %8 : i1
@@ -30,7 +30,7 @@
   %7 = tensor.empty() : tensor<i32>
   %8 = linalg.generic #attrs
     ins(%6, %6 : tensor<i32>, tensor<i32>)
-    outs(%7 : tensor<i32>) {
+    inits(%7 : tensor<i32>) {
     ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):
       %9 = arith.addi %arg0, %arg1 : i32
       linalg.yield %9 : i32
@@ -79,7 +79,7 @@
   %3 = tensor.empty() : tensor<i1>
   %4 = linalg.generic #attrs
     ins(%2, %1 : tensor<i32>, tensor<i32>)
-    outs(%3 : tensor<i1>) {
+    inits(%3 : tensor<i1>) {
     ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):
       %8 = arith.cmpi slt, %arg0, %arg1 : i32
       linalg.yield %8 : i1
@@ -91,7 +91,7 @@
   %7 = tensor.empty() : tensor<i32>
   %8 = linalg.generic #attrs
     ins(%6, %6 : tensor<i32>, tensor<i32>)
-    outs(%7 : tensor<i32>) {
+    inits(%7 : tensor<i32>) {
     ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):
       %9 = arith.addi %arg0, %arg1 : i32
       linalg.yield %9 : i32
@@ -142,7 +142,7 @@
   %3 = tensor.empty() : tensor<i1>
   %4 = linalg.generic #attrs
     ins(%2, %1 : tensor<i32>, tensor<i32>)
-    outs(%3 : tensor<i1>) {
+    inits(%3 : tensor<i1>) {
     ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):
       %8 = arith.cmpi slt, %arg0, %arg1 : i32
       linalg.yield %8 : i1
@@ -159,7 +159,7 @@
   %7 = tensor.empty() : tensor<i32>
   %8 = linalg.generic #attrs
     ins(%6, %12 : tensor<i32>, tensor<i32>)
-    outs(%7 : tensor<i32>) {
+    inits(%7 : tensor<i32>) {
     ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):
       %9 = arith.addi %arg0, %arg1 : i32
       linalg.yield %9 : i32
diff --git a/mlir/test/Dialect/Linalg/detensorize_trivial.mlir b/mlir/test/Dialect/Linalg/detensorize_trivial.mlir
--- a/mlir/test/Dialect/Linalg/detensorize_trivial.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_trivial.mlir
@@ -15,7 +15,7 @@
   %3 = tensor.empty() : tensor<i1>
   %4 = linalg.generic #attrs
     ins(%farg0, %1 : tensor<i32>, tensor<i32>)
-    outs(%3 : tensor<i1>) {
+    inits(%3 : tensor<i1>) {
     ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):
       %8 = arith.cmpi slt, %arg0, %arg1 : i32
       linalg.yield %8 : i1
diff --git a/mlir/test/Dialect/Linalg/detensorize_while.mlir b/mlir/test/Dialect/Linalg/detensorize_while.mlir
--- a/mlir/test/Dialect/Linalg/detensorize_while.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_while.mlir
@@ -15,7 +15,7 @@
   %1 = tensor.empty() : tensor<i1>
   %2 = linalg.generic #attrs
     ins(%0, %farg1 : tensor<i32>, tensor<i32>)
-    outs(%1 : tensor<i1>) {
+    inits(%1 : tensor<i1>) {
     ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):
       %8 = arith.cmpi slt, %arg0, %arg1 : i32
       linalg.yield %8 : i1
@@ -27,7 +27,7 @@
   %5 = tensor.empty() : tensor<i32>
   %6 = linalg.generic #attrs
     ins(%4, %4 : tensor<i32>, tensor<i32>)
-    outs(%5 : tensor<i32>) {
+    inits(%5 : tensor<i32>) {
     ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):
       %8 = arith.addi %arg0, %arg1 : i32
       linalg.yield %8 : i32
diff --git a/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir b/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir
--- a/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir
@@ -28,7 +28,7 @@
   %1 = tensor.empty() : tensor<i32>
   %2 = linalg.generic #sum_reduction_attrs
     ins(%0: tensor<10xi32>)
-    outs(%1: tensor<i32>) {
+    inits(%1: tensor<i32>) {
       ^bb(%a: i32, %x: i32):
         %b = arith.addi %x, %a : i32
         linalg.yield %b : i32
@@ -37,7 +37,7 @@
   %3 = tensor.empty() : tensor<i1>
   %4 = linalg.generic #attrs
     ins(%2, %farg1 : tensor<i32>, tensor<i32>)
-    outs(%3 : tensor<i1>) {
+    inits(%3 : tensor<i1>) {
     ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):
       %8 = arith.cmpi slt, %arg0, %arg1 : i32
       linalg.yield %8 : i1
@@ -49,7 +49,7 @@
   %7 = tensor.empty() : tensor<10xi32>
   %9 = linalg.generic #broadcast_attrs
        ins(%6: tensor<i32>)
-      outs(%7: tensor<10xi32>) {
+      inits(%7: tensor<10xi32>) {
     ^bb(%a: i32, %b: i32) :
       linalg.yield %a : i32
   } -> tensor<10xi32>
@@ -67,7 +67,7 @@
 // DET-ALL:         cf.br ^[[bb1:.*]](%{{.*}} : tensor<10xi32>)
 // DET-ALL:       ^[[bb1]](%{{.*}}: tensor<10xi32>)
 // DET-ALL:         tensor.empty() : tensor<i32>
-// DET-ALL:         linalg.generic {{{.*}}} ins(%{{.*}} : tensor<10xi32>) outs(%{{.*}} : tensor<i32>) {
+// DET-ALL:         linalg.generic {{{.*}}} ins(%{{.*}} : tensor<10xi32>) inits(%{{.*}} : tensor<i32>) {
 // DET-ALL:         ^bb0(%{{.*}}: i32, %{{.*}}: i32):
 // DET-ALL:           %{{.*}} = arith.addi %{{.*}}, %{{.*}}
 // DET-ALL:           linalg.yield %{{.*}} : i32
@@ -78,7 +78,7 @@
 // DET-ALL:       ^[[bb2]](%{{.*}}: i32)
 // DET-ALL:         tensor.from_elements %{{.*}} : tensor<i32>
 // DET-ALL:         tensor.empty() : tensor<10xi32>
-// DET-ALL:         linalg.generic {{{.*}}} ins(%{{.*}} : tensor<i32>) outs(%{{.*}} : tensor<10xi32>) {
+// DET-ALL:         linalg.generic {{{.*}}} ins(%{{.*}} : tensor<i32>) inits(%{{.*}} : tensor<10xi32>) {
 // DET-ALL:         ^bb0(%{{.*}}: i32, %{{.*}}: i32):
 // DET-ALL:           linalg.yield %{{.*}} : i32
 // DET-ALL:         } -> tensor<10xi32>
@@ -92,12 +92,12 @@
 // DET-CF-SAME:    (%{{.*}}: tensor<10xi32>, %{{.*}}: tensor<i32>)
 // DET-CF:         cf.br ^[[bb1:.*]](%{{.*}} : tensor<10xi32>)
 // DET-CF:       ^bb1(%{{.*}}: tensor<10xi32>)
-// DET-CF:         %{{.*}} = linalg.generic {{{.*}}} ins(%{{.*}} : tensor<10xi32>) outs(%{{.*}} : tensor<i32>) {
+// DET-CF:         %{{.*}} = linalg.generic {{{.*}}} ins(%{{.*}} : tensor<10xi32>) inits(%{{.*}} : tensor<i32>) {
 // DET-CF:         tensor.extract %{{.*}}[] : tensor<i32>
 // DET-CF:         cmpi slt, %{{.*}}, %{{.*}} : i32
 // DET-CF:         cf.cond_br %{{.*}}, ^bb2(%{{.*}} : tensor<i32>), ^bb3(%{{.*}} : tensor<i32>)
 // DET-CF:       ^bb2(%{{.*}}: tensor<i32>)
-// DET-CF:         %{{.*}} = linalg.generic {{{.*}}} ins(%{{.*}} : tensor<i32>) outs(%{{.*}} : tensor<10xi32>) {
+// DET-CF:         %{{.*}} = linalg.generic {{{.*}}} ins(%{{.*}} : tensor<i32>) inits(%{{.*}} : tensor<10xi32>) {
 // DET-CF:         cf.br ^bb1(%{{.*}} : tensor<10xi32>)
 // DET-CF:       ^bb3(%{{.*}}: tensor<i32>)
 // DET-CF:         return %{{.*}} : tensor<i32>
diff --git a/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir b/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir
--- a/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir
@@ -20,7 +20,7 @@
   %3 = tensor.empty() : tensor<i1>
   %4 = linalg.generic #attrs
     ins(%2, %reshaped1 : tensor<i32>, tensor<i32>)
-    outs(%3 : tensor<i1>) {
+    inits(%3 : tensor<i1>) {
     ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):
       %8 = arith.cmpi slt, %arg0, %arg1 : i32
       linalg.yield %8 : i1
@@ -32,7 +32,7 @@
   %7 = tensor.empty() : tensor<i32>
   %8 = linalg.generic #attrs
     ins(%6, %6 : tensor<i32>, tensor<i32>)
-    outs(%7 : tensor<i32>) {
+    inits(%7 : tensor<i32>) {
     ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):
       %9 = arith.addi %arg0, %arg1 : i32
       linalg.yield %9 : i32
diff --git a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
--- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
+++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
@@ -16,7 +16,7 @@
 func.func @drop_one_trip_loops(%arg0 : tensor<?x1x?xf32>, %arg1 : f32, %shape: tensor<?x1x?x1x?xf32>) -> tensor<?x1x?x1x?xf32> {
   %0 = linalg.generic #trait
      ins(%arg0, %arg1 : tensor<?x1x?xf32>, f32)
-    outs(%shape : tensor<?x1x?x1x?xf32>) {
+    inits(%shape : tensor<?x1x?x1x?xf32>) {
        ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32) :
          linalg.yield %arg3 : f32
        } -> tensor<?x1x?x1x?xf32>
@@ -62,7 +62,7 @@
 func.func @drop_one_trip_loops_all_ones(%arg0 : tensor<1x1x1xf32>, %arg1 : f32, %shape: tensor<?x1x?x1x?xf32>) -> tensor<?x1x?x1x?xf32> {
   %0 = linalg.generic #trait
      ins(%arg0, %arg1 : tensor<1x1x1xf32>, f32)
-    outs(%shape : tensor<?x1x?x1x?xf32>) {
+    inits(%shape : tensor<?x1x?x1x?xf32>) {
        ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32) :
          linalg.yield %arg3 : f32
        } -> tensor<?x1x?x1x?xf32>
@@ -96,7 +96,7 @@
 {
   %0 = linalg.generic #trait
      ins(%arg0 : tensor<?x1x?xi32>)
-    outs(%shape: tensor<?x1x?x1x?xi32>) {
+    inits(%shape: tensor<?x1x?x1x?xi32>) {
        ^bb0(%arg6 : i32, %arg7 : i32) :
          %idx0 = linalg.index 0 : index
          %idx1 = linalg.index 1 : index
@@ -142,7 +142,7 @@
 {
   %0 = linalg.generic #trait
      ins(%arg0 : tensor<1x1xf32>)
-    outs(%arg0 : tensor<1x1xf32>) {
+    inits(%arg0 : tensor<1x1xf32>) {
        ^bb0(%arg1: f32, %arg2: f32) :
          linalg.yield %arg1 : f32
        } -> tensor<1x1xf32>
@@ -169,7 +169,7 @@
   (%arg0 : tensor<1x1xi32>) -> tensor<1x1xi32>{
   %0 = linalg.generic #trait
      ins(%arg0 : tensor<1x1xi32>)
-    outs(%arg0 : tensor<1x1xi32>) {
+    inits(%arg0 : tensor<1x1xi32>) {
        ^bb0(%arg3: i32, %arg4: i32) :
          %idx0 = linalg.index 0 : index
          %idx1 = linalg.index 1 : index
@@ -202,7 +202,7 @@
 func.func @leading_dim_1_canonicalization(%arg0: tensor<1x5xf32>, %shape: tensor<5xf32>) -> tensor<5xf32> {
   %0 = linalg.generic #trait
      ins(%arg0 : tensor<1x5xf32>)
-    outs(%shape : tensor<5xf32>) {
+    inits(%shape : tensor<5xf32>) {
   ^bb0(%arg2: f32, %arg3: f32):
     linalg.yield %arg2 : f32
   } -> tensor<5xf32>
@@ -236,7 +236,7 @@
   %1 = tensor.expand_shape %arg1 [[0, 1]] : tensor<5xf32> into tensor<5x1xf32>
   %2 = linalg.generic #trait
      ins(%0, %1 : tensor<1x5xf32>, tensor<5x1xf32>)
-    outs(%shape : tensor<5x5xf32>) {
+    inits(%shape : tensor<5x5xf32>) {
        ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
          %3 = arith.addf %arg3, %arg4 : f32
          linalg.yield %3 : f32
@@ -270,7 +270,7 @@
 {
    %0 = linalg.generic #trait
      ins(%arg0 : tensor<1x1xf32>)
-    outs(%shape : tensor<?x?xf32>) {
+    inits(%shape : tensor<?x?xf32>) {
       ^bb0(%arg2 : f32, %arg3 : f32):
         linalg.yield %arg2 : f32
    } -> tensor<?x?xf32>
@@ -296,7 +296,7 @@
   %1 = tensor.empty() : tensor<1x2x5xf32>
   %2 = linalg.generic {i64, indexing_maps = [#map1, #map0],
     iterator_types = ["parallel", "parallel", "parallel"]}
-    ins(%arg0 : tensor<5xf32>) outs(%1 : tensor<1x2x5xf32>) {
+    ins(%arg0 : tensor<5xf32>) inits(%1 : tensor<1x2x5xf32>) {
     ^bb0(%arg1: f32, %arg2: f32):
       linalg.yield %arg1 : f32
     } -> tensor<1x2x5xf32>
@@ -313,11 +313,11 @@
 func.func @fold_unit_dim_for_empty_tensor(%input: tensor<1x1000xf32>) -> tensor<1xf32> {
   %cst = arith.constant 0.0 : f32
   %init = tensor.empty() : tensor<1xf32>
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1xf32>) -> tensor<1xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1xf32>) -> tensor<1xf32>
   %add = linalg.generic {
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>],
       iterator_types = ["parallel", "reduction"]}
-    ins(%input : tensor<1x1000xf32>)outs(%fill : tensor<1xf32>) {
+    ins(%input : tensor<1x1000xf32>)inits(%fill : tensor<1xf32>) {
   ^bb0(%arg1: f32, %arg2: f32):
     %1823 = arith.addf %arg1, %arg2 : f32
     linalg.yield %1823 : f32
@@ -334,12 +334,12 @@
 
 //       CHECK: %[[INPUT_RESHAPE:.+]] = tensor.collapse_shape %{{.+}} {{\[}}[0, 1]] : tensor<1x1000xf32> into tensor<1000xf32>
 //       CHECK: %[[INIT:.+]] = tensor.empty() : tensor<f32>
-//       CHECK: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor<f32>) -> tensor<f32>
+//       CHECK: %[[FILL:.+]] = linalg.fill ins(%cst : f32) inits(%[[INIT]] : tensor<f32>) -> tensor<f32>
 //       CHECK: %[[GENERIC:.+]] = linalg.generic
 //  CHECK-SAME:     indexing_maps = [#[[MAP1]], #[[MAP2]]]
 //  CHECK-SAME:     iterator_types = ["reduction"]
 //  CHECK-SAME:   ins(%[[INPUT_RESHAPE]] : tensor<1000xf32>)
-//  CHECK-SAME:   outs(%[[FILL]] : tensor<f32>)
+//  CHECK-SAME:   inits(%[[FILL]] : tensor<f32>)
 //       CHECK: %[[GENERIC_RESHAPE:.+]] = tensor.expand_shape %[[GENERIC]] [] : tensor<f32> into tensor<1xf32>
 //       CHECK: return %[[GENERIC_RESHAPE:.+]] : tensor<1xf32>
 
@@ -378,13 +378,13 @@
   %c3 = arith.constant 3 : index
   %0 = tensor.dim %arg0, %c3 : tensor<1x?x1x?xf32>
   %1 = tensor.empty(%0) : tensor<1x?xf32>
-  %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x?xf32>) -> tensor<1x?xf32>
+  %2 = linalg.fill ins(%cst : f32) inits(%1 : tensor<1x?xf32>) -> tensor<1x?xf32>
   %3 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>,
                      affine_map<(d0, d1, d2, d3) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel", "reduction", "reduction"]}
     ins(%arg0 : tensor<1x?x1x?xf32>)
-    outs(%2 : tensor<1x?xf32>) {
+    inits(%2 : tensor<1x?xf32>) {
   ^bb0(%arg1: f32, %arg2: f32):
     %4 = arith.addf %arg1, %arg2 : f32
     linalg.yield %4 : f32
@@ -397,12 +397,12 @@
 // CHECK-SAME:   %[[ARG0:.+]]: tensor<1x?x1x?xf32>
 //  CHECK-DAG:   %[[RESHAPE:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1, 2], [3]]
 //      CHECK:   %[[INIT:.+]] = tensor.empty(%{{.+}}) : tensor<?xf32>
-//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[INIT]]
+//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}inits(%[[INIT]]
 //      CHECK:   %[[RESULT:.+]] = linalg.generic
 // CHECK-SAME:     indexing_maps = [#[[MAP2]], #[[MAP3]]]
 // CHECK-SAME:     iterator_types = ["parallel", "reduction"]
 // CHECK-SAME:     ins(%[[RESHAPE]] : tensor<?x?xf32>)
-// CHECK-SAME:     outs(%[[FILL]] : tensor<?xf32>)
+// CHECK-SAME:     inits(%[[FILL]] : tensor<?xf32>)
 //      CHECK:   %[[RESULT_RESHAPE:.+]] = tensor.expand_shape %[[RESULT]] {{\[}}[0, 1]]
 //      CHECK:   return %[[RESULT_RESHAPE]]
 
@@ -412,13 +412,13 @@
   %cst = arith.constant 1.000000e+00 : f32
   %c3 = arith.constant 3 : index
   %1 = tensor.empty() : tensor<1x1xf32>
-  %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x1xf32>) -> tensor<1x1xf32>
+  %2 = linalg.fill ins(%cst : f32) inits(%1 : tensor<1x1xf32>) -> tensor<1x1xf32>
   %3 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>,
                      affine_map<(d0, d1, d2, d3) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel", "reduction", "reduction"]}
     ins(%arg0 : tensor<1x?x1x1xf32>)
-    outs(%2 : tensor<1x1xf32>) {
+    inits(%2 : tensor<1x1xf32>) {
   ^bb0(%arg1: f32, %arg2: f32):
     %4 = arith.addf %arg1, %arg2 : f32
     linalg.yield %4 : f32
@@ -430,13 +430,13 @@
 // CHECK-SAME:   %[[ARG0:.+]]: tensor<1x?x1x1xf32>
 //  CHECK-DAG:   %[[RESHAPE:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1, 2, 3]
 //      CHECK:   %[[INIT:.+]] = tensor.empty() : tensor<1xf32>
-//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[INIT]]
+//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}inits(%[[INIT]]
 //      CHECK:   %[[INIT2:.+]] = tensor.empty() : tensor<1xf32>
 //      CHECK:   %[[RESULT:.+]] = linalg.generic
 // CHECK-SAME:     indexing_maps = [#[[MAP2]], #[[MAP2]], #[[MAP2]]]
 // CHECK-SAME:     iterator_types = ["parallel"]
 // CHECK-SAME:     ins(%[[RESHAPE]], %[[FILL]] : tensor<?xf32>, tensor<1xf32>)
-// CHECK-SAME:     outs(%[[INIT2]] : tensor<1xf32>)
+// CHECK-SAME:     inits(%[[INIT2]] : tensor<1xf32>)
 //      CHECK:   %[[RESULT_RESHAPE:.+]] = tensor.expand_shape %[[RESULT]] {{\[}}[0, 1]]
 //      CHECK:   return %[[RESULT_RESHAPE]]
 
@@ -447,13 +447,13 @@
   %c2 = arith.constant 2 : index
   %0 = tensor.dim %arg0, %c2 : tensor<?x1x?x1xf32>
   %1 = tensor.empty(%0) : tensor<?x1xf32>
-  %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<?x1xf32>) -> tensor<?x1xf32>
+  %2 = linalg.fill ins(%cst : f32) inits(%1 : tensor<?x1xf32>) -> tensor<?x1xf32>
   %3 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>,
                      affine_map<(d0, d1, d2, d3) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel", "reduction", "reduction"]}
     ins(%arg0 : tensor<?x1x?x1xf32>)
-    outs(%2 : tensor<?x1xf32>) {
+    inits(%2 : tensor<?x1xf32>) {
   ^bb0(%arg1: f32, %arg2: f32):
     %4 = arith.addf %arg1, %arg2 : f32
     linalg.yield %4 : f32
@@ -466,12 +466,12 @@
 // CHECK-SAME:   %[[ARG0:.+]]: tensor<?x1x?x1xf32>
 //  CHECK-DAG:   %[[RESHAPE:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1], [2, 3]]
 //      CHECK:   %[[INIT:.+]] = tensor.empty(%{{.+}}) : tensor<?xf32>
-//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[INIT]]
+//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}inits(%[[INIT]]
 //      CHECK:   %[[RESULT:.+]] = linalg.generic
 // CHECK-SAME:     indexing_maps = [#[[MAP2]], #[[MAP3]]]
 // CHECK-SAME:     iterator_types = ["parallel", "reduction"]
 // CHECK-SAME:     ins(%[[RESHAPE]] : tensor<?x?xf32>)
-// CHECK-SAME:     outs(%[[FILL]] : tensor<?xf32>)
+// CHECK-SAME:     inits(%[[FILL]] : tensor<?xf32>)
 //      CHECK:   %[[RESULT_RESHAPE:.+]] = tensor.expand_shape %[[RESULT]] {{\[}}[0, 1]]
 //      CHECK:   return %[[RESULT_RESHAPE]]
 
@@ -516,7 +516,7 @@
 func.func @drop_one_trip_loops(%arg0 : memref<?x1x?xf32>, %arg1 : f32, %shape: memref<?x1x?x1x?xf32>) -> memref<?x1x?x1x?xf32> {
   linalg.generic #trait
      ins(%arg0, %arg1 : memref<?x1x?xf32>, f32)
-    outs(%shape : memref<?x1x?x1x?xf32>) {
+    inits(%shape : memref<?x1x?x1x?xf32>) {
        ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32) :
          linalg.yield %arg3 : f32
        }
@@ -549,7 +549,7 @@
 {
   linalg.generic #trait
      ins(%arg0 : memref<?x1x?xi32>)
-    outs(%shape: memref<?x1x?x1x?xi32>) {
+    inits(%shape: memref<?x1x?x1x?xi32>) {
        ^bb0(%arg6 : i32, %arg7 : i32) :
          %idx0 = linalg.index 0 : index
          %idx1 = linalg.index 1 : index
@@ -595,7 +595,7 @@
 {
   linalg.generic #trait
      ins(%arg0 : memref<1x1xf32>)
-    outs(%arg0 : memref<1x1xf32>) {
+    inits(%arg0 : memref<1x1xf32>) {
        ^bb0(%arg1: f32, %arg2: f32) :
          linalg.yield %arg1 : f32
        }
@@ -622,7 +622,7 @@
   (%arg0 : memref<1x1xi32>) -> memref<1x1xi32>{
   linalg.generic #trait
      ins(%arg0 : memref<1x1xi32>)
-    outs(%arg0 : memref<1x1xi32>) {
+    inits(%arg0 : memref<1x1xi32>) {
        ^bb0(%arg3: i32, %arg4: i32) :
          %idx0 = linalg.index 0 : index
          %idx1 = linalg.index 1 : index
@@ -655,7 +655,7 @@
 func.func @leading_dim_1_canonicalization(%arg0: memref<1x5xf32>, %shape: memref<5xf32>) -> memref<5xf32> {
   linalg.generic #trait
      ins(%arg0 : memref<1x5xf32>)
-    outs(%shape : memref<5xf32>) {
+    inits(%shape : memref<5xf32>) {
   ^bb0(%arg2: f32, %arg3: f32):
     linalg.yield %arg2 : f32
   }
@@ -689,7 +689,7 @@
   %1 = memref.expand_shape %arg1 [[0, 1]] : memref<5xf32> into memref<5x1xf32>
   linalg.generic #trait
      ins(%0, %1 : memref<1x5xf32>, memref<5x1xf32>)
-    outs(%shape : memref<5x5xf32>) {
+    inits(%shape : memref<5x5xf32>) {
        ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
          %3 = arith.addf %arg3, %arg4 : f32
          linalg.yield %3 : f32
@@ -723,7 +723,7 @@
 {
    linalg.generic #trait
      ins(%arg0 : memref<1x1xf32>)
-    outs(%shape : memref<?x?xf32>) {
+    inits(%shape : memref<?x?xf32>) {
       ^bb0(%arg2 : f32, %arg3 : f32):
         linalg.yield %arg2 : f32
    }
@@ -749,7 +749,7 @@
   %1 = memref.alloc() : memref<1x2x5xf32>
   linalg.generic {i64, indexing_maps = [#map1, #map0],
     iterator_types = ["parallel", "parallel", "parallel"]}
-    ins(%arg0 : memref<5xf32>) outs(%1 : memref<1x2x5xf32>) {
+    ins(%arg0 : memref<5xf32>) inits(%1 : memref<1x2x5xf32>) {
     ^bb0(%arg1: f32, %arg2: f32):
       linalg.yield %arg1 : f32
     }
@@ -761,7 +761,7 @@
 //       CHECK:   %[[ALLOC:.*]] = memref.alloc() : memref<1x2x5xf32>
 //       CHECK:   %[[OUT:.*]] = memref.collapse_shape %[[ALLOC]]
 //       CHECK:   linalg.generic
-//       CHECK-SAME:   outs(%[[OUT:.*]] :
+//       CHECK-SAME:   inits(%[[OUT:.*]] :
 //       CHECK:   %[[RESULT:.*]] = memref.collapse_shape %[[ALLOC]]
 //       CHECK:   return %[[RESULT]]
 
@@ -773,7 +773,7 @@
   linalg.generic {
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>],
       iterator_types = ["parallel", "reduction"]}
-    ins(%input : memref<1x1000xf32>)outs(%init : memref<1xf32>) {
+    ins(%input : memref<1x1000xf32>)inits(%init : memref<1xf32>) {
   ^bb0(%arg1: f32, %arg2: f32):
     %1823 = arith.addf %arg1, %arg2 : f32
     linalg.yield %1823 : f32
@@ -793,7 +793,7 @@
 //  CHECK-SAME:     indexing_maps = [#[[MAP1]], #[[MAP2]]]
 //  CHECK-SAME:     iterator_types = ["reduction"]
 //  CHECK-SAME:   ins(%[[INPUT_RESHAPE]] : memref<1000xf32>)
-//  CHECK-SAME:   outs(%[[INIT_RESHAPE]] : memref<f32>)
+//  CHECK-SAME:   inits(%[[INIT_RESHAPE]] : memref<f32>)
 //       CHECK: return %[[INIT:.+]] : memref<1xf32>
 
 
@@ -816,7 +816,7 @@
 func.func @input_stays_same(%arg0 : memref<?x1x?xf32, strided<[?, 1, 1]>>, %arg1 : f32, %shape: memref<?x1x?x1x?xf32>) -> memref<?x1x?x1x?xf32> {
   linalg.generic #trait
      ins(%arg0, %arg1 : memref<?x1x?xf32, strided<[?, 1, 1]>>, f32)
-    outs(%shape : memref<?x1x?x1x?xf32>) {
+    inits(%shape : memref<?x1x?x1x?xf32>) {
        ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32) :
          linalg.yield %arg3 : f32
        }
@@ -836,7 +836,7 @@
 // CHECK-SAME:   {indexing_maps = [#[[MAP1]], #[[MAP2]], #[[MAP3]]],
 // CHECK-SAME:   iterator_types = ["parallel", "parallel", "parallel"]}
 // CHECK-SAME:   ins(%[[ARG0]], %[[ARG1]] : memref<?x1x?xf32, strided<[?, 1, 1]>>, f32)
-// CHECK-SAME:   outs(%[[OUT]] : memref<?x?x?xf32>) {
+// CHECK-SAME:   inits(%[[OUT]] : memref<?x?x?xf32>) {
 // CHECK:      ^bb0(%{{.*}}: f32, %[[ARG:.*]]: f32, %{{.*}}: f32):
 // CHECK:       linalg.yield %[[ARG]] : f32
 // CHECK:      }
@@ -860,7 +860,7 @@
     %0 = tensor.empty() : tensor<8xf32>
     %1 = linalg.generic #matvec
       ins(%arg0, %arg1: tensor<8x8xf32, #CSR>, tensor<8xf32>)
-      outs(%0: tensor<8xf32>) {
+      inits(%0: tensor<8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %m = arith.mulf %a, %b : f32
         %add = arith.addf %x, %m : f32
@@ -882,7 +882,7 @@
   %0 = tensor.empty() : tensor<4x2xf32>
   %res = scf.forall (%arg0, %arg1) in (%c4, %c2) shared_outs(%o = %0) -> (tensor<4x2xf32>) {
     %1 = tensor.empty() : tensor<1x1xf32>
-    %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x1xf32>) -> tensor<1x1xf32>
+    %2 = linalg.fill ins(%cst : f32) inits(%1 : tensor<1x1xf32>) -> tensor<1x1xf32>
     scf.forall.in_parallel {
       //      CHECK: tensor.parallel_insert_slice %{{[0-9a-z]*}} into %{{[0-9a-z]*}}
       // CHECK-SAME: [%{{.*}}, %{{.*}}] [1, 1] [1, 1] : tensor<f32> into tensor<4x2xf32>
@@ -907,7 +907,7 @@
 {
   linalg.generic #trait
      ins(%arg0 : memref<1x1xf32, 3>)
-    outs(%arg0 : memref<1x1xf32, 3>) {
+    inits(%arg0 : memref<1x1xf32, 3>) {
        ^bb0(%arg1: f32, %arg2: f32) :
          linalg.yield %arg1 : f32
        }
diff --git a/mlir/test/Dialect/Linalg/erase-unused-operands-and-results.mlir b/mlir/test/Dialect/Linalg/erase-unused-operands-and-results.mlir
--- a/mlir/test/Dialect/Linalg/erase-unused-operands-and-results.mlir
+++ b/mlir/test/Dialect/Linalg/erase-unused-operands-and-results.mlir
@@ -5,7 +5,7 @@
 //  CHECK-SAME: (%[[ARG0:.*]]: tensor<?xf32>) -> tensor<?xf32> {
 //       CHECK: %[[GENERIC_OP:.*]] = linalg.generic
 //  CHECK-SAME: ins(%[[ARG0]] : tensor<?xf32>)
-//  CHECK-SAME: outs({{.*}} : tensor<?xf32>) {
+//  CHECK-SAME: inits({{.*}} : tensor<?xf32>) {
 #map0 = affine_map<(d0) -> (d0)>
 func.func @remove_deadargs_generic_basic(%arg0: tensor<?xf32>) -> (tensor<?xf32>) {
   %c0 = arith.constant 0 : index
@@ -26,7 +26,7 @@
 // CHECK-LABEL: func @remove_deadargs_generic_mixedaccess
 //       CHECK: %[[GENERIC_OP:.*]] = linalg.generic
 //   CHECK-NOT: ins
-//  CHECK-SAME: outs({{.*}} : tensor<?x?xf32>) {
+//  CHECK-SAME: inits({{.*}} : tensor<?x?xf32>) {
 #map0 = affine_map<(d0, d1) -> (d0, d1)>
 #map1 = affine_map<(d0, d1) -> (d1, d0)>
 func.func @remove_deadargs_generic_mixedaccess(%arg0: tensor<?x?xf32>) -> (tensor<?x?xf32>) {
@@ -62,7 +62,7 @@
   // CHECK:     arith.addf %[[BBARG]], %[[BBARG]]
   %0 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]}
      ins(%arg0, %arg0 : tensor<?xf32>, tensor<?xf32>)
-    outs(%arg0 : tensor<?xf32>) attrs = {someattr} {
+    inits(%arg0 : tensor<?xf32>) attrs = {someattr} {
   ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
     %1 = arith.addf %arg1, %arg2 : f32
     linalg.yield %1 : f32
@@ -85,7 +85,7 @@
   // CHECK: linalg.generic{{.*}}[#[[$MAP0]], #[[$MAP1]], #[[$MAP0]]]
   %0 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
      ins(%arg0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%arg0 : tensor<?x?xf32>) {
+    inits(%arg0 : tensor<?x?xf32>) {
   ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
     %1 = arith.addf %arg1, %arg2 : f32
     linalg.yield %1 : f32
@@ -110,7 +110,7 @@
   // CHECK:     "test.elementwise_mappable"(%[[BBARG0]], %[[BBARG1]], %[[BBARG0]])
   %0 = linalg.generic {indexing_maps = [#map0, #map1, #map0, #map0], iterator_types = ["parallel", "parallel"]}
      ins(%arg0, %arg0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%arg0 : tensor<?x?xf32>) {
+    inits(%arg0 : tensor<?x?xf32>) {
   ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32):
     %1 = "test.elementwise_mappable"(%arg1, %arg2, %arg3) : (f32, f32, f32) -> f32
     linalg.yield %1 : f32
@@ -132,7 +132,7 @@
   // CHECK:     "test.elementwise_mappable"(%[[BBARG0]], %[[BBARG1]], %[[BBARG0]], %[[BBARG1]])
   %0 = linalg.generic {indexing_maps = [#map, #map, #map, #map, #map], iterator_types = ["parallel"]}
      ins(%arg0, %arg1, %arg0, %arg1 : tensor<?xf32>, tensor<?xf32>, tensor<?xf32>, tensor<?xf32>)
-    outs(%arg0 : tensor<?xf32>) {
+    inits(%arg0 : tensor<?xf32>) {
   ^bb0(%arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
     %1 = "test.elementwise_mappable"(%arg2, %arg3, %arg4, %arg5) : (f32, f32, f32, f32) -> f32
     linalg.yield %1 : f32
@@ -154,7 +154,7 @@
       indexing_maps = [#map0, #map1, #map2, #map3, #map4],
       iterator_types = ["parallel", "parallel", "parallel"]}
       ins(%arg0 : tensor<?x?x?xf32>)
-      outs(%arg0, %arg0, %arg0, %arg0
+      inits(%arg0, %arg0, %arg0, %arg0
           : tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) :
       %1 = arith.addf %b0, %b0: f32
@@ -169,7 +169,7 @@
 // CHECK-SAME:     %[[ARG0:.+]]: tensor<?x?x?xf32>)
 //      CHECK:   %[[GENERIC:.+]]:2 = linalg.generic
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
-// CHECK-SAME:       outs(%[[ARG0]], %[[ARG0]] :
+// CHECK-SAME:       inits(%[[ARG0]], %[[ARG0]] :
 //      CHECK:   return %[[GENERIC]]#0, %[[GENERIC]]#1
 
 // -----
@@ -186,7 +186,7 @@
     indexing_maps = [#map0, #map1, #map1],
     iterator_types = ["reduction"]}
     ins(%arg0 : tensor<?xf32>)
-    outs(%init0, %init1 : tensor<f32>, tensor<i32>) {
+    inits(%init0, %init1 : tensor<f32>, tensor<i32>) {
   ^bb0(%b0: f32, %b1: f32, %b2: i32):
     %8 = linalg.index 0 : index
     %9 = arith.index_cast %8 : index to i32
@@ -205,7 +205,7 @@
 //  CHECK-DAG:   %[[INIT0:.+]] = tensor.empty() : tensor<f32>
 //  CHECK-DAG:   %[[INIT1:.+]] = tensor.empty() : tensor<i32>
 //      CHECK:   %[[GENERIC:.+]]:2 = linalg.generic
-// CHECK-SAME:       outs(%[[INIT0]], %[[INIT1]] :
+// CHECK-SAME:       inits(%[[INIT0]], %[[INIT1]] :
 //      CHECK:   return %[[GENERIC]]#1
 
 // -----
@@ -214,11 +214,11 @@
 func.func @loop_dim_operand(%arg0 : tensor<?xf32>) -> tensor<i32> {
   %cst = arith.constant 0 : i32
   %init = tensor.empty() : tensor<i32>
-  %fill = linalg.fill ins(%cst : i32) outs(%init : tensor<i32>) -> tensor<i32>
+  %fill = linalg.fill ins(%cst : i32) inits(%init : tensor<i32>) -> tensor<i32>
   %0 = linalg.generic {
       indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>],
       iterator_types = ["reduction"]}
-      ins(%arg0 : tensor<?xf32>) outs(%fill : tensor<i32>) {
+      ins(%arg0 : tensor<?xf32>) inits(%fill : tensor<i32>) {
     ^bb0(%b0: f32, %b1: i32):
       %1 = linalg.index 0 : index
       %2 = arith.index_cast %1 : index to i32
@@ -239,11 +239,11 @@
   %cst = arith.constant 0 : i32
   %init1 = tensor.empty(%arg0) : tensor<?xi32>
   %init = tensor.empty() : tensor<i32>
-  %fill = linalg.fill ins(%cst : i32) outs(%init : tensor<i32>) -> tensor<i32>
+  %fill = linalg.fill ins(%cst : i32) inits(%init : tensor<i32>) -> tensor<i32>
   %0:2 = linalg.generic {
       indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>],
       iterator_types = ["parallel"]}
-      outs(%init1, %fill : tensor<?xi32>, tensor<i32>) {
+      inits(%init1, %fill : tensor<?xi32>, tensor<i32>) {
     ^bb0(%b0: i32, %b1: i32):
       %1 = linalg.index 0 : index
       %2 = arith.index_cast %1 : index to i32
@@ -256,7 +256,7 @@
 // CHECK-SAME:     %[[ARG0:.+]]: index
 //      CHECK:   %[[INIT:.+]] = tensor.empty(%[[ARG0]])
 //      CHECK:   linalg.generic
-// CHECK-SAME:       outs(%[[INIT]]
+// CHECK-SAME:       inits(%[[INIT]]
 
 // -----
 
@@ -271,7 +271,7 @@
       iterator_types = ["parallel", "reduction"]}
       ins(%arg4, %arg0, %arg0, %arg1, %arg3, %arg3
           : tensor<?xi32>, tensor<?x?xi32>, tensor<?x?xi32>, tensor<?xi32>, tensor<?x?xi32>, tensor<?x?xi32>)
-      outs(%arg2 : tensor<?xi32>) {
+      inits(%arg2 : tensor<?xi32>) {
     ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32, %b4 : i32, %b5 : i32, %b6 : i32):
       %1 = arith.addi %b0, %b1 : i32
       %2 = arith.addi %1, %b2 : i32
@@ -297,7 +297,7 @@
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP2]]]
 // CHECK-SAME:       iterator_types = ["parallel", "reduction"]
 // CHECK-SAME:       ins(%[[ARG4]], %[[ARG0]], %[[ARG1]], %[[ARG3]] :
-// CHECK-SAME:       outs(%[[ARG2]] :
+// CHECK-SAME:       inits(%[[ARG2]] :
 //      CHECK:   ^{{.+}}(%[[B0:[a-zA-Z0-9]+]]: i32
 // CHECK-SAME:       %[[B1:[a-zA-Z0-9_]+]]: i32
 // CHECK-SAME:       %[[B2:[a-zA-Z0-9_]+]]: i32
@@ -323,7 +323,7 @@
     indexing_maps = [#map, #map, #map],
     iterator_types = ["parallel", "parallel"]}
     ins(%arg0 : tensor<?x?xf32>)
-    outs(%arg0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>) {
+    inits(%arg0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32):
       %1 = arith.addf %b0, %b0 : f32
       linalg.yield %1, %1 : f32, f32
@@ -333,7 +333,7 @@
 //      CHECK: func @drop_redundant_results
 // CHECK-SAME:     %[[ARG0:.+]]: tensor<?x?xf32>
 //      CHECK:   %[[GENERIC:.+]] = linalg.generic
-// CHECK-SAME:       outs(%[[ARG0]] :
+// CHECK-SAME:       inits(%[[ARG0]] :
 //      CHECK:   return %[[GENERIC]]
 
 // -----
@@ -357,7 +357,7 @@
       indexing_maps = [#map0, #map1, #map2, #map3, #map4],
       iterator_types = ["parallel", "parallel", "parallel"]}
       ins(%arg0 : tensor<?x?x?xf32>)
-      outs(%arg0, %arg0, %init0, %init0
+      inits(%arg0, %arg0, %init0, %init0
           : tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) :
       linalg.yield %b0, %b0, %b3, %b4 : f32, f32, f32, f32
@@ -372,7 +372,7 @@
 // CHECK-SAME:     %[[ARG0:.+]]: tensor<?x?x?xf32>)
 //      CHECK:   %[[GENERIC:.+]]:2 = linalg.generic
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
-// CHECK-SAME:       outs(%[[ARG0]], %[[ARG0]] :
+// CHECK-SAME:       inits(%[[ARG0]], %[[ARG0]] :
 //      CHECK:   return %[[GENERIC]]#0, %[[GENERIC]]#1
 
 // -----
@@ -396,7 +396,7 @@
       indexing_maps = [#map0, #map1, #map2, #map3, #map4],
       iterator_types = ["parallel", "parallel", "parallel"]}
       ins(%arg0 : tensor<?x?x?xf32>)
-      outs(%arg0, %arg0, %init0, %init0
+      inits(%arg0, %arg0, %init0, %init0
           : tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) :
       %1 = arith.addf %b0, %b0: f32
@@ -414,7 +414,7 @@
 // CHECK-SAME:     %[[ARG0:.+]]: tensor<?x?x?xf32>)
 //      CHECK:   %[[GENERIC:.+]]:2 = linalg.generic
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
-// CHECK-SAME:       outs(%[[ARG0]], %[[ARG0]] :
+// CHECK-SAME:       inits(%[[ARG0]], %[[ARG0]] :
 //      CHECK:   return %[[GENERIC]]#0, %[[GENERIC]]#1
 
 // -----
@@ -437,7 +437,7 @@
       indexing_maps = [#map0, #map1, #map2, #map3],
       iterator_types = ["parallel", "parallel", "parallel"]}
       ins(%arg0 : tensor<?x?x?xf32>)
-      outs(%arg0, %init0, %init0
+      inits(%arg0, %init0, %init0
           : tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32) :
       linalg.yield %b2, %b1, %b3 : f32, f32, f32
@@ -452,7 +452,7 @@
 //      CHECK:   %[[INIT:.+]] = tensor.empty
 //      CHECK:   %[[GENERIC:.+]]:2 = linalg.generic
 // CHECK-SAME:       indexing_maps = [#[[MAP1]], #[[MAP2]]]
-// CHECK-SAME:       outs(%[[ARG0]], %[[INIT]] :
+// CHECK-SAME:       inits(%[[ARG0]], %[[INIT]] :
 //      CHECK:   return %[[GENERIC]]#0
 
 // -----
@@ -475,7 +475,7 @@
       indexing_maps = [#map0, #map1, #map2, #map3],
       iterator_types = ["parallel", "parallel", "parallel"]}
       ins(%arg0 : tensor<?x?x?xf32>)
-      outs(%arg0, %init0, %init0
+      inits(%arg0, %init0, %init0
           : tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32) :
       %1 = arith.addf %b1, %b2: f32
@@ -492,7 +492,7 @@
 //      CHECK:   %[[INIT:.+]] = tensor.empty
 //      CHECK:   %[[GENERIC:.+]]:2 = linalg.generic
 // CHECK-SAME:       indexing_maps = [#[[MAP1]], #[[MAP2]]]
-// CHECK-SAME:       outs(%[[ARG0]], %[[INIT]] :
+// CHECK-SAME:       inits(%[[ARG0]], %[[INIT]] :
 //      CHECK:   return %[[GENERIC]]#0
 
 
@@ -505,7 +505,7 @@
     -> tensor<?xf32>
 {
   //      CHECK-INPUT: %[[result:.*]] = linalg.generic {indexing_maps = [#{{.*}}, #{{.*}}], iterator_types = ["parallel"]}
-  // CHECK-INPUT-SAME:     ins(%[[a]] : tensor<?xf32>) outs(%[[b]] : tensor<?xf32>) {
+  // CHECK-INPUT-SAME:     ins(%[[a]] : tensor<?xf32>) inits(%[[b]] : tensor<?xf32>) {
   //      CHECK-INPUT: ^bb0(%[[in:.*]]: f32, %[[out:.*]]: f32):
   //      CHECK-INPUT:   %[[add:.*]] = arith.addf %[[in]], %[[out]]
   //      CHECK-INPUT:   linalg.yield %[[add]]
@@ -513,7 +513,7 @@
   //      CHECK-INPUT: return %[[result]]
   %0 = linalg.generic
     {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]}
-    ins(%a, %b : tensor<?xf32>, tensor<?xf32>) outs(%b : tensor<?xf32>) {
+    ins(%a, %b : tensor<?xf32>, tensor<?xf32>) inits(%b : tensor<?xf32>) {
   ^bb0(%in: f32, %in_2: f32, %out: f32):
     %16 = arith.addf %in, %in_2 : f32
     linalg.yield %16 : f32
diff --git a/mlir/test/Dialect/Linalg/fill-interface-invalid.mlir b/mlir/test/Dialect/Linalg/fill-interface-invalid.mlir
--- a/mlir/test/Dialect/Linalg/fill-interface-invalid.mlir
+++ b/mlir/test/Dialect/Linalg/fill-interface-invalid.mlir
@@ -18,7 +18,7 @@
   %0 = test.linalg_fill_op {
       indexing_maps = [#map0, #map0, #map1],
       iterator_types = ["parallel"]}
-      ins(%arg0, %arg0 : f32, f32) outs(%arg1 : tensor<?xf32>) {
+      ins(%arg0, %arg0 : f32, f32) inits(%arg1 : tensor<?xf32>) {
       ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32):
          linalg.yield  %arg2 : f32
       } -> tensor<?xf32>
@@ -34,7 +34,7 @@
   %0 = test.linalg_fill_op {
       indexing_maps = [#map1, #map1],
       iterator_types = ["parallel"]}
-      ins(%arg0 : tensor<?xf32>) outs(%arg1 : tensor<?xf32>) {
+      ins(%arg0 : tensor<?xf32>) inits(%arg1 : tensor<?xf32>) {
       ^bb0(%arg2 : f32, %arg3 : f32):
          linalg.yield  %arg2 : f32
       } -> tensor<?xf32>
diff --git a/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir b/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir
--- a/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir
+++ b/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir
@@ -15,7 +15,7 @@
 {
   %0 = linalg.generic #trait
     ins(%arg0 : tensor<?x1x?xf32>)
-    outs(%shape : tensor<?x1x?x1x?xf32>) {
+    inits(%shape : tensor<?x1x?x1x?xf32>) {
        ^bb0(%arg1 : f32, %arg2 : f32) :
          linalg.yield %arg1 : f32
        } -> tensor<?x1x?x1x?xf32>
@@ -42,7 +42,7 @@
 {
   %0 = linalg.generic #trait
      ins(%arg0 : tensor<1x1xf32>)
-    outs(%arg0 : tensor<1x1xf32>) {
+    inits(%arg0 : tensor<1x1xf32>) {
        ^bb0(%arg1: f32, %arg2: f32) :
          linalg.yield %arg1 : f32
        } -> tensor<1x1xf32>
@@ -68,7 +68,7 @@
 {
   linalg.generic #trait
      ins(%arg0 : memref<1x1xf32>)
-    outs(%arg1 : memref<1x1xf32>) {
+    inits(%arg1 : memref<1x1xf32>) {
     ^bb0(%arg2: f32, %arg3 : f32) :
       linalg.yield %arg2 : f32
     }
@@ -96,7 +96,7 @@
 func.func @leading_dim_1_canonicalization(%arg0: tensor<1x5xf32>, %shape: tensor<5xf32>) -> tensor<5xf32> {
   %0 = linalg.generic #trait
        ins(%arg0 : tensor<1x5xf32>)
-      outs(%shape : tensor<5xf32>) {
+      inits(%shape : tensor<5xf32>) {
     ^bb0(%arg2: f32, %arg3: f32):
       linalg.yield %arg2 : f32
   } -> tensor<5xf32>
diff --git a/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir b/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir
--- a/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir
+++ b/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir
@@ -29,7 +29,7 @@
   %c0 = arith.constant 0: index
   %f0 = arith.constant 0.0: f32
   %alloc = memref.alloc() : memref<32 x f32>
-  linalg.fill ins(%f0 : f32) outs(%alloc : memref<32 x f32>)
+  linalg.fill ins(%f0 : f32) inits(%alloc : memref<32 x f32>)
   %subview = memref.subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32>
   memref.copy %in, %subview : memref<? x f32> to memref<16 x f32>
   %0 = vector.transfer_read %alloc[%c0], %f0 {in_bounds = [true]} : memref<32 x f32>, vector<32 x f32>
@@ -69,7 +69,7 @@
   %alloc = memref.alloc() : memref<128 x i8>
   %view = memref.view %alloc[%c0][] : memref<128 x i8> to memref<32 x f32>
   %subview = memref.subview %view[0][16][1] : memref<32 x f32> to memref<16 x f32>
-  linalg.fill ins(%f0 : f32) outs(%view : memref<32 x f32>)
+  linalg.fill ins(%f0 : f32) inits(%view : memref<32 x f32>)
   memref.copy %in, %subview : memref<? x f32> to memref<16 x f32>
   %0 = vector.transfer_read %view[%c0], %f0 {in_bounds = [true]} : memref<32 x f32>, vector<32 x f32>
   memref.dealloc %alloc : memref<128 x i8>
@@ -129,7 +129,7 @@
   %f0 = arith.constant 0.0: f32
   %f1 = arith.constant 1.0: f32
   %alloc = memref.alloc() : memref<32 x f32>
-  linalg.fill ins(%f0 : f32) outs(%alloc : memref<32 x f32>)
+  linalg.fill ins(%f0 : f32) inits(%alloc : memref<32 x f32>)
   %subview = memref.subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32>
   memref.copy %in, %subview : memref<? x f32> to memref<16 x f32>
   "some_interleaved_use"(%subview) : (memref<16 x f32>) -> ()
diff --git a/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir b/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir
--- a/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir
+++ b/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir
@@ -16,7 +16,7 @@
     indexing_maps = [#map0, #map1, #map2, #map3],
     iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]}
     ins(%expand, %arg1, %arg2 : tensor<2x3x4x5x6x7x8x9xi32>, tensor<2x3x4xi32>, tensor<5x6x7x8xi32>)
-    outs(%init : tensor<2x3x4x5x6x7x8x9xi32>) {
+    inits(%init : tensor<2x3x4x5x6x7x8x9xi32>) {
       ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32):
         %t0 = arith.addi %b0, %b1 : i32
         %t1 = arith.addi %t0, %b2 : i32
@@ -39,7 +39,7 @@
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP0]]]
 // CHECK-SAME:       iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1_RESHAPE]], %[[ARG2_RESHAPE]] :
-// CHECK-SAME:       outs(%[[INIT_RESHAPE]] :
+// CHECK-SAME:       inits(%[[INIT_RESHAPE]] :
 //      CHECK:   %[[RESULT_RESHAPE:.+]] = tensor.expand_shape %[[COLLAPSED_OP]] {{\[}}[0], [1, 2], [3], [4, 5, 6], [7]{{\]}}
 //      CHECK:   return %[[RESULT_RESHAPE]]
 
@@ -67,7 +67,7 @@
     indexing_maps = [#map0, #map1, #map2, #map3],
     iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]}
     ins(%expand, %arg1, %arg2 : tensor<2x3x4x5x6x7x8x9xi32>, tensor<2x3x4xi32>, tensor<5x6x7x8xi32>)
-    outs(%init : tensor<2x3x4x5x6x7x8x9xi32>) {
+    inits(%init : tensor<2x3x4x5x6x7x8x9xi32>) {
       ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32):
         %iv0 = linalg.index 0: index
         %iv1 = linalg.index 1: index
@@ -129,7 +129,7 @@
     indexing_maps = [#map0, #map1, #map2, #map3],
     iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]}
     ins(%expand, %arg1, %arg2 : tensor<9x7x8x2x3x4x5x6xi32>, tensor<7x8x2xi32>, tensor<6x3x4x5xi32>)
-    outs(%init : tensor<2x3x4x5x6x7x8x9xi32>) {
+    inits(%init : tensor<2x3x4x5x6x7x8x9xi32>) {
       ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32):
         %t0 = arith.addi %b0, %b1 : i32
         %t1 = arith.addi %t0, %b2 : i32
@@ -153,7 +153,7 @@
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]]]
 // CHECK-SAME:       iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1_RESHAPE]], %[[ARG2_RESHAPE]] :
-// CHECK-SAME:       outs(%[[INIT_RESHAPE]] :
+// CHECK-SAME:       inits(%[[INIT_RESHAPE]] :
 //      CHECK:   %[[RESULT_RESHAPE:.+]] = tensor.expand_shape %[[COLLAPSED_OP]] {{\[}}[0], [1, 2, 3], [4], [5, 6], [7]{{\]}}
 //      CHECK:   return %[[RESULT_RESHAPE]]
 
@@ -181,7 +181,7 @@
     indexing_maps = [#map0, #map1, #map2, #map3],
     iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]}
     ins(%expand, %arg1, %arg2 : tensor<?x7x?x?x3x?x5x?xi32>, tensor<?x?x?xi32>, tensor<?x?x?x?xi32>)
-    outs(%init : tensor<?x3x?x5x?x7x?x?xi32>) {
+    inits(%init : tensor<?x3x?x5x?x7x?x?xi32>) {
       ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32):
         %iv0 = linalg.index 0: index
         %iv1 = linalg.index 1: index
@@ -229,7 +229,7 @@
   %1 = linalg.generic {
       indexing_maps = [#map0, #map1],
       iterator_types = ["parallel", "reduction", "reduction", "parallel"]}
-      ins(%0 : tensor<2x6x?x5xf32>) outs(%arg1 : tensor<2x5xf32>) {
+      ins(%0 : tensor<2x6x?x5xf32>) inits(%arg1 : tensor<2x5xf32>) {
         ^bb0(%b0 : f32, %b1 : f32):
           %2 = arith.addf %b0, %b1 : f32
           linalg.yield %2 : f32
@@ -245,7 +245,7 @@
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]]]
 // CHECK-SAME:       iterator_types = ["parallel", "reduction", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]] : tensor<2x?x5xf32>)
-// CHECK-SAME:       outs(%[[ARG1]] : tensor<2x5xf32>)
+// CHECK-SAME:       inits(%[[ARG1]] : tensor<2x5xf32>)
 
 // -----
 
@@ -258,7 +258,7 @@
   %1 = linalg.generic {
       indexing_maps = [#map0, #map1, #map0],
       iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
-      ins(%0, %arg1 : tensor<2x3x4x5xf32>, tensor<2x3xf32>) outs(%init : tensor<2x3x4x5xf32>) {
+      ins(%0, %arg1 : tensor<2x3x4x5xf32>, tensor<2x3xf32>) inits(%init : tensor<2x3x4x5xf32>) {
         ^bb0(%b0 : f32, %b1 : f32, %b2 : f32):
           %2 = arith.addf %b0, %b1 : f32
           linalg.yield %2 : f32
@@ -285,7 +285,7 @@
   %1 = linalg.generic {
       indexing_maps = [#map0, #map1, #map2],
       iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
-      ins(%0, %arg1 : tensor<2x3x4x5xf32>, tensor<2xf32>) outs(%init : tensor<2x4x3x5xf32>) {
+      ins(%0, %arg1 : tensor<2x3x4x5xf32>, tensor<2xf32>) inits(%init : tensor<2x4x3x5xf32>) {
         ^bb0(%b0 : f32, %b1 : f32, %b2 : f32):
           %2 = arith.addf %b0, %b1 : f32
           linalg.yield %2 : f32
@@ -312,7 +312,7 @@
   %1 = linalg.generic {
       indexing_maps = [#map0, #map1, #map2],
       iterator_types = ["parallel", "reduction", "parallel", "parallel"]}
-      ins(%0, %arg1 : tensor<2x3x4x5xf32>, tensor<2x3xf32>) outs(%init : tensor<2x5xf32>) {
+      ins(%0, %arg1 : tensor<2x3x4x5xf32>, tensor<2x3xf32>) inits(%init : tensor<2x5xf32>) {
         ^bb0(%b0 : f32, %b1 : f32, %b2 : f32):
           %2 = arith.addf %b0, %b1 : f32
           linalg.yield %2 : f32
@@ -341,7 +341,7 @@
   %2 = linalg.generic {
       indexing_maps = [#map0, #map1, #map2],
       iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
-      ins(%0, %1 : tensor<2x3xf32>, tensor<4x5xf32>) outs(%init : tensor<2x3x4x5xf32>) {
+      ins(%0, %1 : tensor<2x3xf32>, tensor<4x5xf32>) inits(%init : tensor<2x3x4x5xf32>) {
         ^bb0(%b0 : f32, %b1 : f32, %b2 : f32):
           %3 = arith.addf %b0, %b1 : f32
           linalg.yield %3 : f32
@@ -358,7 +358,7 @@
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
 // CHECK-SAME:       iterator_types = ["parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]] :
-// CHECK-SAME:       outs(%{{.+}}: tensor<6x20xf32>)
+// CHECK-SAME:       inits(%{{.+}}: tensor<6x20xf32>)
 //      CHECK:   %[[RESHAPE1:.+]] = tensor.expand_shape %[[GENERIC]] {{\[}}[0], [1, 2]{{\]}}
 //      CHECK:   %[[RESHAPE2:.+]] = tensor.expand_shape %[[RESHAPE1]] {{\[}}[0, 1], [2], [3]{{\]}}
 //      CHECK:   return %[[RESHAPE2]]
@@ -374,7 +374,7 @@
 //      CONTROL:     %[[INIT_RESHAPE:.+]] = tensor.collapse_shape %[[INIT]] {{\[}}[0], [1], [2, 3]{{\]}}
 //      CONTROL:     %[[GENERIC:.+]] = linalg.generic
 // CONTROL-SAME:         ins(%[[EXPAND]], %[[ARG1]] :
-// CONTROL-SAME:         outs(%[[INIT_RESHAPE]] :
+// CONTROL-SAME:         inits(%[[INIT_RESHAPE]] :
 //      CONTROL:     %[[RESULT:.+]] = tensor.expand_shape %[[GENERIC]] {{\[}}[0], [1], [2, 3]{{\]}}
 
 // -----
@@ -387,7 +387,7 @@
   %1 = linalg.generic {
       indexing_maps = [#map, #map],
       iterator_types = ["parallel"]}
-      ins(%0: tensor<1xf32>) outs(%init : tensor<1xf32>) {
+      ins(%0: tensor<1xf32>) inits(%init : tensor<1xf32>) {
         ^bb0(%b0 : f32, %b1 : f32):
           linalg.yield %b0: f32
       } -> tensor<1xf32>
@@ -410,7 +410,7 @@
       indexing_maps = [#map0, #map1, #map1],
       iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
       ins(%0, %arg1 : tensor<?x4x?x8xf32>, tensor<4x?x?x8xf32>)
-      outs(%arg1 : tensor<4x?x?x8xf32>) {
+      inits(%arg1 : tensor<4x?x?x8xf32>) {
     ^bb0(%b0: f32, %b1 : f32, %b2 : f32):
       %2 = arith.addf %b0, %b1 : f32
       linalg.yield %2 : f32
@@ -430,7 +430,7 @@
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP1]]]
 // CHECK-SAME:       iterator_types = ["parallel", "parallel", "parallel"]
 // CHECK-SAME:       ins(%[[COLLAPSE_ARG0]], %[[COLLAPSE_ARG1_0]] :
-// CHECK-SAME:       outs(%[[COLLAPSE_ARG1_1]] :
+// CHECK-SAME:       inits(%[[COLLAPSE_ARG1_1]] :
 //      CHECK:   %[[EXPAND_GENERIC:.+]] = tensor.expand_shape %[[GENERIC]] {{\[}}[0], [1], [2, 3]{{\]}}
 //      CHECK:   return %[[EXPAND_GENERIC]]
 
@@ -448,7 +448,7 @@
   %1 = linalg.generic {
       indexing_maps = [#map0, #map1],
       iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
-      ins(%0 : tensor<?x4x?x8xi32>) outs(%init : tensor<?x8x?x4xi32>) {
+      ins(%0 : tensor<?x4x?x8xi32>) inits(%init : tensor<?x8x?x4xi32>) {
     ^bb0(%b0 : i32, %b1 : i32):
       %2 = linalg.index 0 : index
       %3 = linalg.index 1 : index
@@ -474,7 +474,7 @@
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]]]
 // CHECK-SAME:       iterator_types = ["parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]] :
-// CHECK-SAME:       outs(%[[COLLAPSE_INIT]] :
+// CHECK-SAME:       inits(%[[COLLAPSE_INIT]] :
 // CHECK-NEXT:   ^bb{{[0-9]}}
 //      CHECK:       %[[ID0:.+]] = linalg.index 0
 //  CHECK-DAG:       %[[T0:.+]] = arith.remui %[[ID0]], %[[C4]]
@@ -504,7 +504,7 @@
   %1 = linalg.generic {
       indexing_maps = [#map0, #map1],
       iterator_types = ["reduction", "reduction", "reduction", "reduction"]}
-      ins(%0 : tensor<?x4x?x8xi32>) outs(%init : tensor<i32>) {
+      ins(%0 : tensor<?x4x?x8xi32>) inits(%init : tensor<i32>) {
     ^bb0(%b0 : i32, %b1 : i32):
       %2 = linalg.index 0 : index
       %3 = linalg.index 1 : index
diff --git a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir
--- a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir
@@ -13,7 +13,7 @@
   %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
   %3 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%2 : tensor<?x?xf32>) {
+      inits(%2 : tensor<?x?xf32>) {
     ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
       %4 = arith.addf %arg3, %arg4 : f32
       linalg.yield %4 : f32
@@ -22,7 +22,7 @@
   // CHECK-SAME: indexing_maps = {{\[}}[[$MAP0]], [[$MAP0]], [[$MAP0]], [[$MAP0]]{{\]}}
   %4 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%3, %arg2 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%2 : tensor<?x?xf32>) {
+      inits(%2 : tensor<?x?xf32>) {
     // CHECK: ^{{[a-zA-Z0-9_]*}}
     // CHECK-SAME: [[ARG0:%[a-zA-Z0-9_]*]]
     // CHECK-SAME: [[ARG1:%[a-zA-Z0-9_]*]]
@@ -55,7 +55,7 @@
   %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
   %3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%arg0, %arg1 : tensor<?x?xf32>, f32)
-      outs(%2 : tensor<?x?xf32>) {
+      inits(%2 : tensor<?x?xf32>) {
     ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
       %4 = arith.addf %arg3, %arg4 : f32
       linalg.yield %4 : f32
@@ -64,7 +64,7 @@
   // CHECK-SAME: indexing_maps = {{\[}}[[$MAP0]], [[$MAP1]], [[$MAP1]], [[$MAP0]]{{\]}}
   %4 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%3, %arg2 : tensor<?x?xf32>, f32)
-      outs(%2 : tensor<?x?xf32>) {
+      inits(%2 : tensor<?x?xf32>) {
     // CHECK: ^{{[a-zA-Z0-9_]*}}
     // CHECK-SAME: [[ARG3:%[a-zA-Z0-9_]*]]
     // CHECK-SAME: [[ARG4:%[a-zA-Z0-9_]*]]
@@ -97,7 +97,7 @@
   %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
   %3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%2 : tensor<?x?xf32>) {
+      inits(%2 : tensor<?x?xf32>) {
     ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
       %4 = arith.addf %arg3, %arg4 : f32
       linalg.yield %4 : f32
@@ -106,7 +106,7 @@
   // CHECK-SAME: indexing_maps = {{\[}}[[$MAP0]], [[$MAP1]], [[$MAP0]], [[$MAP0]]{{\]}}
   %4 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%3, %arg2 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%2 : tensor<?x?xf32>) {
+      inits(%2 : tensor<?x?xf32>) {
     ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):
       %5 = arith.mulf %arg5, %arg6 : f32
       linalg.yield %5 : f32
@@ -131,7 +131,7 @@
   %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
   %3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%2 : tensor<?x?xf32>) {
+      inits(%2 : tensor<?x?xf32>) {
     ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
       %4 = arith.addf %arg3, %arg4 : f32
       linalg.yield %4 : f32
@@ -140,7 +140,7 @@
   // CHECK-SAME: indexing_maps = {{\[}}[[$MAP1]], [[$MAP0]], [[$MAP0]], [[$MAP0]]{{\]}}
   %4 = linalg.generic {indexing_maps = [#map1, #map0, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%3, %arg2 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%2 : tensor<?x?xf32>){
+      inits(%2 : tensor<?x?xf32>){
     ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):
       %5 = arith.mulf %arg5, %arg6 : f32
       linalg.yield %5 : f32
@@ -165,7 +165,7 @@
   %1 = tensor.empty(%0) : tensor<?xf32>
   %2 = linalg.generic {indexing_maps = [#map2, #map2, #map2], iterator_types = ["parallel"]}
       ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
-      outs(%1 : tensor<?xf32>) {
+      inits(%1 : tensor<?xf32>) {
     ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
       %3 = arith.addf %arg3, %arg4 : f32
       linalg.yield %3 : f32
@@ -176,7 +176,7 @@
   %4 = tensor.empty(%0, %3) : tensor<?x?xf32>
   %5 = linalg.generic {indexing_maps = [#map1, #map0, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%2, %arg2 : tensor<?xf32>, tensor<?x?xf32>)
-      outs(%4 : tensor<?x?xf32>){
+      inits(%4 : tensor<?x?xf32>){
     ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):
       %6 = arith.mulf %arg5, %arg6 : f32
       linalg.yield %6 : f32
@@ -195,7 +195,7 @@
   %0 = tensor.empty() : tensor<f32>
   %1 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = []}
       ins(%arg0, %arg1 : tensor<f32>, tensor<f32>)
-      outs(%0 : tensor<f32>) {
+      inits(%0 : tensor<f32>) {
     ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
       %2 = arith.addf %arg3, %arg4 : f32
       linalg.yield %2 : f32
@@ -205,7 +205,7 @@
   // CHECK: arith.mulf
   %2 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = []}
       ins(%1, %arg2 : tensor<f32>, tensor<f32>)
-      outs(%0 : tensor<f32>) {
+      inits(%0 : tensor<f32>) {
     ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
       %3 = arith.mulf %arg3, %arg4 : f32
       linalg.yield %3 : f32
@@ -231,7 +231,7 @@
     indexing_maps = [#map0, #map1, #map1],
     iterator_types = ["parallel", "parallel", "parallel"]}
     ins(%cst, %arg0 : tensor<5xf32>, tensor<5x?x?xf32>)
-    outs(%2 : tensor<5x?x?xf32>) {
+    inits(%2 : tensor<5x?x?xf32>) {
     ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
       %4 = arith.mulf %arg1, %arg2 : f32
       linalg.yield %4 : f32
@@ -263,7 +263,7 @@
     indexing_maps = [#map0, #map1, #map1],
     iterator_types = ["parallel", "parallel", "parallel"]}
     ins(%cst, %arg0 : tensor<f32>, tensor<5x?x?xf32>)
-    outs(%2 : tensor<5x?x?xf32>) {
+    inits(%2 : tensor<5x?x?xf32>) {
     ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
       %4 = arith.mulf %arg1, %arg2 : f32
       linalg.yield %4 : f32
@@ -291,7 +291,7 @@
     indexing_maps = [#map0, #map0, #map0],
     iterator_types = ["parallel", "parallel"] }
     ins(%arg0, %arg1  : tensor<?x?xi32>, tensor<?x?xi32>)
-    outs(%2 : tensor<?x?xi32>) {
+    inits(%2 : tensor<?x?xi32>) {
     ^bb0(%arg2: i32, %arg3: i32, %arg4: i32):
       %10 = arith.addi %arg2, %arg3 : i32
       linalg.yield %10 : i32
@@ -300,7 +300,7 @@
     indexing_maps = [#map0, #map0],
     iterator_types = ["parallel", "parallel"] }
     ins(%3 : tensor<?x?xi32>)
-    outs(%2 : tensor<?x?xi32>) {
+    inits(%2 : tensor<?x?xi32>) {
     ^bb0(%arg2: i32, %arg3: i32):
       %idx0 = linalg.index 0 : index
       %idx1 = linalg.index 1 : index
@@ -342,7 +342,7 @@
     indexing_maps = [#map0, #map0],
     iterator_types = ["parallel", "parallel"] }
     ins(%arg0 : tensor<?x?xi32>)
-    outs(%2 : tensor<?x?xi32>) {
+    inits(%2 : tensor<?x?xi32>) {
     ^bb0(%arg4: i32, %arg5: i32):
       %idx0 = linalg.index 0 : index
       %idx1 = linalg.index 1 : index
@@ -356,7 +356,7 @@
     indexing_maps = [#map0, #map0, #map0],
     iterator_types = ["parallel", "parallel"] }
     ins(%3, %arg0 : tensor<?x?xi32>, tensor<?x?xi32>)
-    outs(%2 : tensor<?x?xi32>) {
+    inits(%2 : tensor<?x?xi32>) {
     ^bb0(%arg2: i32, %arg3: i32, %arg4: i32):
       %10 = arith.addi %arg2, %arg3 : i32
       linalg.yield %10 : i32
@@ -396,7 +396,7 @@
     indexing_maps = [#map0, #map0],
     iterator_types = ["parallel", "parallel"] }
     ins(%arg0 : tensor<?x?xi32>)
-    outs(%2 : tensor<?x?xi32>) {
+    inits(%2 : tensor<?x?xi32>) {
     ^bb0(%arg2: i32, %arg3: i32):
       %idx0 = linalg.index 0 : index
       %idx1 = linalg.index 1 : index
@@ -410,7 +410,7 @@
     indexing_maps = [#map1, #map1],
     iterator_types = ["parallel", "parallel"] }
     ins(%3 : tensor<?x?xi32>)
-    outs(%2 : tensor<?x?xi32>) {
+    inits(%2 : tensor<?x?xi32>) {
     ^bb0(%arg2: i32, %arg3: i32):
       %idx0 = linalg.index 0 : index
       %idx1 = linalg.index 1 : index
@@ -457,7 +457,7 @@
   %1 = linalg.generic
       {indexing_maps = [#map1, #map1],
        iterator_types = ["parallel"]}
-      ins(%arg0 : tensor<?xi32>) outs(%0 : tensor<?xi32>) {
+      ins(%arg0 : tensor<?xi32>) inits(%0 : tensor<?xi32>) {
       ^bb0(%arg2 : i32, %arg3 : i32):
         %2 = linalg.index 0 : index
         %3 = arith.index_cast %2 : index to i32
@@ -471,7 +471,7 @@
       {indexing_maps = [#map2, #map3, #map2],
        iterator_types = ["parallel", "parallel"]}
       ins(%arg1, %1 : tensor<?x?xi32>, tensor<?xi32>)
-      outs(%4 : tensor<?x?xi32>) {
+      inits(%4 : tensor<?x?xi32>) {
       ^bb0(%arg2 : i32, %arg3 : i32, %arg4: i32):
         %6 = arith.addi %arg2, %arg3 : i32
         linalg.yield %6 : i32
@@ -503,7 +503,7 @@
   %1 = linalg.generic
     {indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>],
      iterator_types = []}
-    ins(%arg1 : tensor<i32>) outs(%0 : tensor<f32>) {
+    ins(%arg1 : tensor<i32>) inits(%0 : tensor<f32>) {
     ^bb0(%arg2: i32, %arg3: f32):
       %3 = arith.index_cast %arg2 : i32 to index
       %4 = tensor.extract %arg0[%3, %c0, %c0] : tensor<5x1x1xf32>
@@ -514,7 +514,7 @@
    {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>,
                      affine_map<(d0) -> (d0)>],
     iterator_types = ["parallel"]}
-    ins(%1, %cst : tensor<f32>, tensor<10xf32>) outs(%2 : tensor<10xf32>) {
+    ins(%1, %cst : tensor<f32>, tensor<10xf32>) inits(%2 : tensor<10xf32>) {
     ^bb0(%arg2: f32, %arg3: f32, %arg4: f32):
       %4 = arith.mulf %arg2, %arg3 : f32
       linalg.yield %4 : f32
@@ -559,7 +559,7 @@
 //      CHECK:   %[[T1:.+]] = linalg.generic
 // CHECK-SAME:     indexing_maps = [#[[MAP]], #[[MAP]]]
 // CHECK-SAME:     ins(%[[ARG0]] : tensor<4xf32>)
-// CHECK-SAME:     outs(%[[T0]] : tensor<4xf32>)
+// CHECK-SAME:     inits(%[[T0]] : tensor<4xf32>)
 //      CHECK:   ^{{.+}}(
 // CHECK-SAME:     %[[ARG1:[a-zA-Z0-9_]+]]: f32, %[[ARG2:[a-zA-Z0-9_]+]]: f32)
 //      CHECK:     %[[T2:.+]] = arith.addf %[[ARG1]], %[[CST]]
@@ -579,7 +579,7 @@
     {indexing_maps = [#map0, #map0, #map0],
      iterator_types = ["parallel", "parallel"]}
     ins(%arg0, %arg1 : tensor<1x10xf32>, tensor<1x10xf32>)
-    outs(%init : tensor<1x10xf32>) {
+    inits(%init : tensor<1x10xf32>) {
   ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
     %2 = arith.addf %arg3, %arg4 : f32
     linalg.yield %2 : f32
@@ -588,7 +588,7 @@
     {indexing_maps = [#map1, #map2],
      iterator_types = ["reduction"]}
     ins(%0 : tensor<1x10xf32>)
-    outs(%arg2 : tensor<1xf32>)  {
+    inits(%arg2 : tensor<1xf32>)  {
   ^bb0(%arg3: f32, %arg4: f32):
     %2 = arith.addf %arg3, %arg4 : f32
     linalg.yield %2 : f32
@@ -625,7 +625,7 @@
     affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
   }
-     outs(%init0 : tensor<?x1xf32>) {
+     inits(%init0 : tensor<?x1xf32>) {
     ^bb0(%a: f32):
       linalg.yield %cp5 : f32
   } -> tensor<?x1xf32>
@@ -638,7 +638,7 @@
     iterator_types = ["parallel", "parallel"]
   }
       ins(%0, %1 : tensor<?x1xf32>, tensor<?x1xf32>)
-     outs(%init1 : tensor<?x1xf32>) {
+     inits(%init1 : tensor<?x1xf32>) {
   ^bb0(%a: f32, %b: f32, %c: f32):
       %m = arith.mulf %a, %b : f32
       linalg.yield %m : f32
@@ -656,7 +656,7 @@
   %0 = linalg.generic {
     indexing_maps = [affine_map<(i, j) -> (i, j)>],
     iterator_types = ["parallel", "parallel"]}
-  outs(%arg0 : tensor<1x8xf64>) {
+  inits(%arg0 : tensor<1x8xf64>) {
   ^bb0(%a: f64):
     %r = func.call @compute1(%a) : (f64) -> f64
     linalg.yield %r : f64
@@ -672,7 +672,7 @@
     indexing_maps = [affine_map<(i, j) -> (i, j)>, affine_map<(i, j) -> (i, j)>],
     iterator_types = ["parallel", "parallel"]}
   ins(%0 : tensor<1x8xf64>)
-  outs(%arg1 : tensor<1x8xi32>) {
+  inits(%arg1 : tensor<1x8xi32>) {
   ^bb0(%a: f64, %b: i32):
     %r = func.call @compute2(%a, %b) : (f64, i32) -> i32
     linalg.yield %r : i32
@@ -697,7 +697,7 @@
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                        affine_map<(d0, d1) -> (d0)>],
       iterator_types = ["parallel", "reduction"]}
-     ins(%three : tensor<3x2xf32>) outs(%init : tensor<3xf32>) {
+     ins(%three : tensor<3x2xf32>) inits(%init : tensor<3xf32>) {
      ^bb0(%arg0 : f32, %arg1 : f32):
         %0 = arith.addf %arg0, %arg1 : f32
         linalg.yield %0 : f32
@@ -714,12 +714,12 @@
 }
 func.func @break_outs_dependency(%arg0 : tensor<?x?xf32>) -> tensor<?x?xf32>
 {
-  %0 = linalg.generic #trait ins(%arg0 : tensor<?x?xf32>) outs(%arg0 : tensor<?x?xf32>) {
+  %0 = linalg.generic #trait ins(%arg0 : tensor<?x?xf32>) inits(%arg0 : tensor<?x?xf32>) {
        ^bb0(%arg1 : f32, %arg2 : f32) :
          %1 = arith.addf %arg1, %arg1 : f32
          linalg.yield %1 : f32
        } -> tensor<?x?xf32>
-  %2 = linalg.generic #trait ins(%0 : tensor<?x?xf32>) outs(%0 : tensor<?x?xf32>) {
+  %2 = linalg.generic #trait ins(%0 : tensor<?x?xf32>) inits(%0 : tensor<?x?xf32>) {
        ^bb0(%arg1 : f32, %arg2 : f32) :
          %3 = arith.mulf %arg1, %arg1 : f32
          linalg.yield %3 : f32
@@ -734,12 +734,12 @@
 //  CHECK-DAG:   %[[D1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
 //  CHECK-DAG:   %[[INIT:.+]] = tensor.empty(%[[D0]], %[[D1]])
 //      CHECK:   %[[GENERIC1:.+]] = linalg.generic
-// CHECK-SAME:     outs(%[[INIT]] : tensor<?x?xf32>)
+// CHECK-SAME:     inits(%[[INIT]] : tensor<?x?xf32>)
 //  CHECK-DAG:   %[[D0:.+]] = tensor.dim %[[GENERIC1]], %[[C0]]
 //  CHECK-DAG:   %[[D1:.+]] = tensor.dim %[[GENERIC1]], %[[C1]]
 //  CHECK-DAG:   %[[INIT:.+]] = tensor.empty(%[[D0]], %[[D1]])
 //      CHECK:   %[[RESULT:.+]] = linalg.generic
-// CHECK-SAME:     outs(%[[INIT]] : tensor<?x?xf32>)
+// CHECK-SAME:     inits(%[[INIT]] : tensor<?x?xf32>)
 
 // -----
 
@@ -760,7 +760,7 @@
                        affine_map<(d0, d1) -> (d0, d1)>],
       iterator_types = ["parallel", "parallel"]}
       ins(%arg0, %cst, %c42 : tensor<?x?xf32>, f32, i32)
-      outs(%0, %1 : tensor<?x?xf32>, tensor<?x?xi32>) {
+      inits(%0, %1 : tensor<?x?xf32>, tensor<?x?xi32>) {
       ^bb0(%arg1 : f32, %arg2 : f32, %arg3 : i32, %arg4 : f32, %arg5 : i32) :
         %3 = arith.addf %arg1, %arg2 : f32
         linalg.yield %3, %arg3 : f32, i32
@@ -785,7 +785,7 @@
   %1 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
-  } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) {
+  } ins(%input : tensor<2x3xf32>) inits(%init : tensor<3x2xf32>) {
   ^bb0(%arg1: f32, %arg2: f32):
     linalg.yield %arg1 : f32
   } -> tensor<3x2xf32>
@@ -803,7 +803,7 @@
   %1 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
-  } ins(%input : tensor<2x3xf64>) outs(%init : tensor<3x2xf64>) {
+  } ins(%input : tensor<2x3xf64>) inits(%init : tensor<3x2xf64>) {
   ^bb0(%arg1: f64, %arg2: f64):
     linalg.yield %arg1 : f64
   } -> tensor<3x2xf64>
@@ -827,7 +827,7 @@
   %1 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d0, d3, d1)>],
     iterator_types = ["parallel", "parallel", "parallel", "parallel"]
-  } ins(%input : tensor<1x2x3x4xi32>) outs(%init : tensor<3x1x4x2xi32>) {
+  } ins(%input : tensor<1x2x3x4xi32>) inits(%init : tensor<3x1x4x2xi32>) {
   ^bb0(%arg1: i32, %arg2: i32):
     linalg.yield %arg1 : i32
   } -> tensor<3x1x4x2xi32>
@@ -851,7 +851,7 @@
   %1 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d0, d3, d1)>],
     iterator_types = ["parallel", "parallel", "parallel", "parallel"]
-  } ins(%input : tensor<1x2x3x4xi16>) outs(%init : tensor<3x1x4x2xi16>) {
+  } ins(%input : tensor<1x2x3x4xi16>) inits(%init : tensor<3x1x4x2xi16>) {
   ^bb0(%arg1: i16, %arg2: i16):
     linalg.yield %arg1 : i16
   } -> tensor<3x1x4x2xi16>
@@ -867,7 +867,7 @@
   %1 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
-  } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) {
+  } ins(%input : tensor<2x3xf32>) inits(%init : tensor<3x2xf32>) {
   ^bb0(%arg1: f32, %arg2: f32):
     linalg.yield %arg1 : f32
   } -> tensor<3x2xf32>
@@ -884,7 +884,7 @@
   %1 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
-  } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) {
+  } ins(%input : tensor<2x3xf32>) inits(%init : tensor<3x2xf32>) {
   ^bb0(%arg1: f32, %arg2: f32):
     linalg.yield %cst : f32
   } -> tensor<3x2xf32>
@@ -900,7 +900,7 @@
   %1 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
-  } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) {
+  } ins(%input : tensor<2x3xf32>) inits(%init : tensor<3x2xf32>) {
   ^bb0(%arg1: f32, %arg2: f32):
     %add = arith.addf %arg1, %arg1 : f32
     linalg.yield %add : f32
@@ -929,16 +929,16 @@
   %5 = linalg.generic {
     indexing_maps = [#map0, #map1],
     iterator_types = ["parallel", "parallel"]
-  } ins(%arg0 : tensor<f32>) outs(%4 : tensor<?x?xf32>) {
+  } ins(%arg0 : tensor<f32>) inits(%4 : tensor<?x?xf32>) {
   ^bb0(%arg2: f32, %arg3: f32):
     linalg.yield %arg2 : f32
   } -> tensor<?x?xf32>
   %6 = tensor.empty(%arg1) : tensor<?xf32>
-  %7 = linalg.fill ins(%cst : f32) outs(%6 : tensor<?xf32>) -> tensor<?xf32>
+  %7 = linalg.fill ins(%cst : f32) inits(%6 : tensor<?xf32>) -> tensor<?xf32>
   %8 = linalg.generic {
     indexing_maps = [#map2, #map3],
     iterator_types = ["parallel", "reduction"]
-  } ins(%5 : tensor<?x?xf32>) outs(%7 : tensor<?xf32>) {
+  } ins(%5 : tensor<?x?xf32>) inits(%7 : tensor<?xf32>) {
   ^bb0(%arg2: f32, %arg3: f32):
     %9 = arith.maxf %arg2, %arg3 : f32
     linalg.yield %9 : f32
@@ -953,7 +953,7 @@
   %0 = linalg.generic {
         indexing_maps = [affine_map<(d0) -> (d0)>],
         iterator_types = ["parallel"]}
-        outs(%arg0 : tensor<5000xi64>) {
+        inits(%arg0 : tensor<5000xi64>) {
         ^bb0(%arg3: i64):  // no predecessors
           %22 = linalg.index 0 : index
           %23 = arith.index_cast %22 : index to i64
@@ -963,7 +963,7 @@
   %2 = linalg.generic {
         indexing_maps = [affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d1)>],
         iterator_types = ["parallel", "parallel"]}
-        ins(%0 : tensor<5000xi64>) outs(%1 : tensor<5000xi32>) {
+        ins(%0 : tensor<5000xi64>) inits(%1 : tensor<5000xi32>) {
         ^bb0(%arg3: i64, %arg5: i32):  // no predecessors
           %22 = arith.index_cast %arg3 : i64 to index
           %23 = tensor.extract %arg1[%22] : tensor<5000xi32>
@@ -980,7 +980,7 @@
 //  CHECK-DAG:   %[[INIT1:.+]] = tensor.empty() : tensor<5000xi32>
 //      CHECK:   %[[RESULT:.+]]:2 = linalg.generic
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]]]
-// CHECK-SAME:       outs(%[[INIT0]], %[[INIT1]] :
+// CHECK-SAME:       inits(%[[INIT0]], %[[INIT1]] :
 // CHECK-NEXT:   ^bb0(
 // CHECK-SAME:       %[[B0:.+]]: i64
 // CHECK-SAME:       %[[B1:.+]]: i32
@@ -998,14 +998,14 @@
 //   CHECK-NOT: linalg.fill
 //       CHECK: %[[GENERIC_OP:.*]] = linalg.generic
 //  CHECK-SAME: ins(%[[ARG0]] : tensor<?xf32>)
-//  CHECK-SAME: outs({{.*}} : tensor<?xf32>) {
+//  CHECK-SAME: inits({{.*}} : tensor<?xf32>) {
 #map0 = affine_map<(d0) -> (d0)>
 func.func @fold_fill_generic_basic(%arg0: tensor<?xf32>) -> (tensor<?xf32>) {
   %c0 = arith.constant 0 : index
   %cst = arith.constant 7.0 : f32
   %0 = tensor.dim %arg0, %c0 : tensor<?xf32>
   %1 = tensor.empty(%0) : tensor<?xf32>
-  %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<?xf32>) -> tensor<?xf32>
+  %2 = linalg.fill ins(%cst : f32) inits(%1 : tensor<?xf32>) -> tensor<?xf32>
   %3 = tensor.empty(%0) : tensor<?xf32>
   %4 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types=["parallel"]} ins(%arg0, %2 : tensor<?xf32>, tensor<?xf32>) outs (%3:tensor<?xf32>) {
   ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
@@ -1022,14 +1022,14 @@
 //   CHECK-NOT: linalg.fill
 //       CHECK: %[[GENERIC_OP:.*]] = linalg.generic
 //  CHECK-SAME: ins(%[[ARG0]] : tensor<?xf16>)
-//  CHECK-SAME: outs({{.*}} : tensor<?xf16>) {
+//  CHECK-SAME: inits({{.*}} : tensor<?xf16>) {
 #map0 = affine_map<(d0) -> (d0)>
 func.func @fold_fill_generic_different_dtype(%arg0: tensor<?xf16>) -> (tensor<?xf16>) {
   %c0 = arith.constant 0 : index
   %cst = arith.constant 7.0 : f32
   %0 = tensor.dim %arg0, %c0 : tensor<?xf16>
   %1 = tensor.empty(%0) : tensor<?xf16>
-  %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<?xf16>) -> tensor<?xf16>
+  %2 = linalg.fill ins(%cst : f32) inits(%1 : tensor<?xf16>) -> tensor<?xf16>
   %3 = tensor.empty(%0) : tensor<?xf16>
   %4 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types=["parallel"]} ins(%arg0, %2 : tensor<?xf16>, tensor<?xf16>) outs (%3:tensor<?xf16>) {
   ^bb0(%arg1: f16, %arg2: f16, %arg3: f16):
@@ -1045,7 +1045,7 @@
 //   CHECK-NOT: linalg.fill
 //       CHECK: %[[GENERIC_OP:.*]] = linalg.generic
 //   CHECK-NOT: ins
-//  CHECK-SAME: outs({{.*}} : tensor<?x?xf32>) {
+//  CHECK-SAME: inits({{.*}} : tensor<?x?xf32>) {
 #map0 = affine_map<(d0, d1) -> (d0, d1)>
 #map1 = affine_map<(d0, d1) -> (d1, d0)>
 func.func @fold_fill_generic_mixedaccess(%arg0: tensor<?x?xf32>) -> (tensor<?x?xf32>) {
@@ -1056,9 +1056,9 @@
   %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
   %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
-  %3 = linalg.fill ins(%cst1 : f32) outs(%2 : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %3 = linalg.fill ins(%cst1 : f32) inits(%2 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %4 = tensor.empty(%1, %0) : tensor<?x?xf32>
-  %5 = linalg.fill ins(%cst2 : f32) outs(%4 : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %5 = linalg.fill ins(%cst2 : f32) inits(%4 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %6 = tensor.empty(%0, %1) : tensor<?x?xf32>
   %7 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types=["parallel","parallel"]} ins(%3, %5 : tensor<?x?xf32>, tensor<?x?xf32>) outs (%6:tensor<?x?xf32>) {
   ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
@@ -1077,7 +1077,7 @@
     %1 = tensor.empty() : tensor<f32>
     %2:2 = linalg.generic {
       indexing_maps = [#map, #map, #map, #map, #map], iterator_types = []}
-      ins(%arg0, %arg1, %arg1 : tensor<f32>, tensor<f32>, tensor<f32>) outs(%0, %1 : tensor<f32>, tensor<f32>) {
+      ins(%arg0, %arg1, %arg1 : tensor<f32>, tensor<f32>, tensor<f32>) inits(%0, %1 : tensor<f32>, tensor<f32>) {
     ^bb0(%arg5: f32, %arg6: f32, %arg7: f32, %arg8: f32, %arg9: f32):
       %4 = arith.addf %arg5, %arg6 : f32
       %5 = arith.addf %4, %arg7 : f32
@@ -1085,7 +1085,7 @@
     } -> (tensor<f32>, tensor<f32>)
     %3 = linalg.generic {
       indexing_maps = [#map, #map, #map], iterator_types = []}
-      ins(%2#1, %arg1 : tensor<f32>, tensor<f32>) outs(%arg4 : tensor<f32>) {
+      ins(%2#1, %arg1 : tensor<f32>, tensor<f32>) inits(%arg4 : tensor<f32>) {
     ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):
       %4 = arith.addf %arg5, %arg6 : f32
       %5 = arith.addf %4, %arg6 : f32
@@ -1100,7 +1100,7 @@
 //       CHECK:   %[[INIT:.+]] = tensor.empty
 //       CHECK:   %[[GENERIC:.+]] = linalg.generic
 //  CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]] :
-//  CHECK-SAME:       outs(%[[INIT]] :
+//  CHECK-SAME:       inits(%[[INIT]] :
 //  CHECK-NEXT:     ^bb0
 //  CHECK-SAME:         %[[B0:[a-zA-Z0-9_]+]]: f32
 //  CHECK-SAME:         %[[B1:[a-zA-Z0-9_]+]]: f32
@@ -1126,7 +1126,7 @@
   %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
   %3 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%2 : tensor<?x?xf32>) {
+      inits(%2 : tensor<?x?xf32>) {
     ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
       %4 = arith.addf %arg3, %arg4 : f32
       linalg.yield %4 : f32
@@ -1135,7 +1135,7 @@
   // CHECK-SAME: indexing_maps = {{\[}}[[$MAP0]], [[$MAP0]], [[$MAP0]], [[$MAP0]]{{\]}}
   linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%3, %arg2 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%arg8 : memref<?x?xf32>) {
+      inits(%arg8 : memref<?x?xf32>) {
     // CHECK: ^{{[a-zA-Z0-9_]*}}
     // CHECK-SAME: [[ARG0:%[a-zA-Z0-9_]*]]
     // CHECK-SAME: [[ARG1:%[a-zA-Z0-9_]*]]
diff --git a/mlir/test/Dialect/Linalg/fusion-elementwise-options.mlir b/mlir/test/Dialect/Linalg/fusion-elementwise-options.mlir
--- a/mlir/test/Dialect/Linalg/fusion-elementwise-options.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-elementwise-options.mlir
@@ -20,28 +20,28 @@
   %init = tensor.empty(%d0, %d1) : tensor<?x?xf32>
   %0 = linalg.generic #binary2Dpointwise
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%init : tensor<?x?xf32>) {
+      inits(%init : tensor<?x?xf32>) {
     ^bb0(%arg6 : f32, %arg7 : f32, %arg8 : f32):
        %1 = arith.mulf %arg6, %arg7 : f32
        linalg.yield %1 : f32
     } -> tensor<?x?xf32>
   %2 = linalg.generic #binary2Dpointwise
       ins(%arg2, %arg3 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%init : tensor<?x?xf32>) {
+      inits(%init : tensor<?x?xf32>) {
     ^bb0(%arg6 : f32, %arg7 : f32, %arg8 : f32):
        %3 = arith.mulf %arg6, %arg7 : f32
        linalg.yield %3 : f32
     } -> tensor<?x?xf32>
   %4 = linalg.generic #binary2Dpointwise
       ins(%arg4, %arg5 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%init : tensor<?x?xf32>) {
+      inits(%init : tensor<?x?xf32>) {
     ^bb0(%arg6 : f32, %arg7 : f32, %arg8 : f32):
        %5 = arith.mulf %arg6, %arg7 : f32
        linalg.yield %5 : f32
     } -> tensor<?x?xf32>
   %6 = linalg.generic #ternary2Dpointwise
       ins(%0, %2, %4 : tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%init : tensor<?x?xf32>) {
+      inits(%init : tensor<?x?xf32>) {
     ^bb0(%arg6 : f32, %arg7 : f32, %arg8 : f32, %arg9 : f32):
        %7 = arith.addf %arg6, %arg7 : f32
        %8 = arith.addf %7, %arg8 : f32
diff --git a/mlir/test/Dialect/Linalg/fusion-elementwise.mlir b/mlir/test/Dialect/Linalg/fusion-elementwise.mlir
--- a/mlir/test/Dialect/Linalg/fusion-elementwise.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-elementwise.mlir
@@ -6,7 +6,7 @@
   %0:2 = linalg.generic {
       indexing_maps = [#map, #map, #map],
       iterator_types = ["parallel", "parallel"]}
-      ins(%arg0 : tensor<?x?xf32>) outs(%arg0, %arg0  : tensor<?x?xf32>, tensor<?x?xf32>) {
+      ins(%arg0 : tensor<?x?xf32>) inits(%arg0, %arg0  : tensor<?x?xf32>, tensor<?x?xf32>) {
     ^bb0(%b0: f32, %b1: f32, %b2: f32):
       %1 = arith.addf %b0, %b0 : f32
       %2 = arith.mulf %b0, %b0 : f32
@@ -15,7 +15,7 @@
   %3 = linalg.generic {
       indexing_maps = [#map, #map, #map],
       iterator_types = ["parallel", "parallel"]}
-      ins(%0#0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%arg0  : tensor<?x?xf32>) {
+      ins(%0#0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) inits(%arg0  : tensor<?x?xf32>) {
     ^bb0(%b0: f32, %b1: f32, %b2: f32):
       %4 = arith.subf %b0, %b1 : f32
       linalg.yield %4 : f32
diff --git a/mlir/test/Dialect/Linalg/fusion-multiuse-producer.mlir b/mlir/test/Dialect/Linalg/fusion-multiuse-producer.mlir
--- a/mlir/test/Dialect/Linalg/fusion-multiuse-producer.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-multiuse-producer.mlir
@@ -8,7 +8,7 @@
       indexing_maps = [#map, #map, #map],
       iterator_types = ["parallel", "parallel"]}
       ins(%arg0 : tensor<?x?xf32>)
-      outs(%arg1, %arg2 : tensor<?x?xf32>, tensor<?x?xf32>) {
+      inits(%arg1, %arg2 : tensor<?x?xf32>, tensor<?x?xf32>) {
   ^bb0(%b0: f32, %b1 : f32, %b2 : f32):
     %1 = arith.addf %b0, %b1 : f32
     linalg.yield %1, %1 : f32, f32
@@ -17,7 +17,7 @@
       indexing_maps = [#map, #map, #map],
       iterator_types = ["parallel", "parallel"]}
       ins(%0#1, %arg3 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%arg4 : tensor<?x?xf32>) {
+      inits(%arg4 : tensor<?x?xf32>) {
   ^bb0(%b0 : f32, %b1 : f32, %b2 : f32):
     %3 = arith.mulf %b0, %b1 : f32
     linalg.yield %3 : f32
diff --git a/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir b/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir
--- a/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir
@@ -8,7 +8,7 @@
 //      CHECK: %[[RI:.*]] = tensor.collapse_shape %[[INIT]] {{\[}}[0, 1], [2]] : tensor<?x112x16xf32> into tensor<?x16xf32>
 //      CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP3]], #[[$MAP2]]],
 // CHECK-SAME: iterator_types = ["parallel", "parallel"]}
-// CHECK-SAME: ins(%[[A]], %[[B]] : tensor<?x16xf32>, tensor<16xf32>) outs(%[[RI]] : tensor<?x16xf32>)
+// CHECK-SAME: ins(%[[A]], %[[B]] : tensor<?x16xf32>, tensor<16xf32>) inits(%[[RI]] : tensor<?x16xf32>)
 //      CHECK: %[[RR:.*]] = tensor.expand_shape %[[R]] {{\[}}[0, 1], [2]] : tensor<?x16xf32> into tensor<?x112x16xf32>
 //      CHECK: return %[[RR]] : tensor<?x112x16xf32>
 func.func @reshape(%A: tensor<?x16xf32>, %B: tensor<16xf32>, %init: tensor<?x112x16xf32>) -> tensor<?x112x16xf32> {
@@ -19,7 +19,7 @@
     affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
     iterator_types = ["parallel", "parallel", "parallel"]}
   ins(%0, %B : tensor<?x112x16xf32>, tensor<16xf32>)
-  outs(%init : tensor<?x112x16xf32>) {
+  inits(%init : tensor<?x112x16xf32>) {
   ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
     %s = arith.subf %arg1, %arg2 : f32
     linalg.yield %s : f32
@@ -38,7 +38,7 @@
 //      CHECK: %[[RI:.*]] = tensor.collapse_shape %[[I]] {{\[}}[0, 1], [2]] : tensor<112x112x16xf32> into tensor<12544x16xf32>
 //      CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP2]], #[[$MAP3]], #[[$MAP2]]],
 // CHECK-SAME: iterator_types = ["parallel", "parallel"]}
-// CHECK-SAME: ins(%[[A]], %[[B]], %[[C]] : tensor<12544x16xf32>, tensor<12544x16xf32>, tensor<16xf32>) outs(%[[RI]] : tensor<12544x16xf32>)
+// CHECK-SAME: ins(%[[A]], %[[B]], %[[C]] : tensor<12544x16xf32>, tensor<12544x16xf32>, tensor<16xf32>) inits(%[[RI]] : tensor<12544x16xf32>)
 //      CHECK: %[[RR:.*]] = tensor.expand_shape %[[R]] {{\[}}[0, 1], [2]] : tensor<12544x16xf32> into tensor<112x112x16xf32>
 //      CHECK: return %[[RR]] : tensor<112x112x16xf32>
 func.func @reshape_multiple(%A: tensor<12544x16xf32>, %B: tensor<12544x16xf32>,
@@ -55,7 +55,7 @@
     affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
     iterator_types = ["parallel", "parallel", "parallel"]}
   ins(%0, %1, %C : tensor<112x112x16xf32>, tensor<112x112x16xf32>, tensor<16xf32>)
-  outs(%2 : tensor<112x112x16xf32>) {
+  inits(%2 : tensor<112x112x16xf32>) {
   ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32):
     %s = arith.subf %arg1, %arg2 : f32
     %m = arith.mulf %s, %arg3 : f32
@@ -81,7 +81,7 @@
     affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
     iterator_types = ["parallel", "parallel", "parallel"]}
   ins(%20, %B : tensor<112x112x16xf32>, tensor<112xf32>)
-  outs(%21 : tensor<112x112x16xf32>) {
+  inits(%21 : tensor<112x112x16xf32>) {
   ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
     %s = arith.subf %arg1, %arg2 : f32
     linalg.yield %s : f32
@@ -106,7 +106,7 @@
                        affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
       iterator_types = ["parallel", "parallel", "parallel"]}
       ins(%25, %arg1, %arg2 : tensor<2x3x5xi32>, tensor<5xf32>, tensor<5xf32>)
-      outs(%26 : tensor<2x3x5xf32>) {
+      inits(%26 : tensor<2x3x5xf32>) {
       ^bb0(%arg6: i32, %arg7: f32, %arg8: f32, %arg9: f32):
         %29 = arith.sitofp %arg6 : i32 to f32
         %30 = arith.addf %arg7, %cst_8 : f32
@@ -121,6 +121,6 @@
 // CHECK-LABEL: func @type_correctness
 //       CHECK:   %[[OP:.+]] = linalg.generic
 //  CHECK-SAME:   ins(%{{.+}}, %{{.+}}, %{{.+}} : tensor<6x5xi32>, tensor<5xf32>, tensor<5xf32>)
-//  CHECK-SAME:   outs(%{{.+}} : tensor<6x5xf32>)
+//  CHECK-SAME:   inits(%{{.+}} : tensor<6x5xf32>)
 //       CHECK:   tensor.expand_shape %[[OP]]
 //  CHECK-SAME:   tensor<6x5xf32> into tensor<2x3x5xf32>
diff --git a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir
--- a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir
+++ b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir
@@ -2,7 +2,7 @@
 
 func.func @generalize_matmul_buffer(%A : memref<16x8xf32>, %B: memref<8x32xf32>, %C: memref<16x32xf32>) {
   linalg.matmul ins(%A, %B: memref<16x8xf32>, memref<8x32xf32>)
-               outs(%C: memref<16x32xf32>)
+               inits(%C: memref<16x32xf32>)
   return
 }
 
@@ -20,7 +20,7 @@
 // CHECK-SAME: indexing_maps = [#[[A_MAP]], #[[B_MAP]], #[[C_MAP]]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]
 // CHECK-SAME:  ins(%[[A]], %[[B]]
-// CHECK-SAME: outs(%[[C]]
+// CHECK-SAME: inits(%[[C]]
 
 // CHECK: ^{{.*}}(%[[A_ARG:.+]]: f32, %[[B_ARG:.+]]: f32, %[[C_ARG:.+]]: f32)
 // CHECK:   %[[MUL:.+]] = arith.mulf %[[A_ARG]], %[[B_ARG]] : f32
@@ -31,7 +31,7 @@
 
 func.func @generalize_matmul_tensor(%A : tensor<16x8xf32>, %B: tensor<8x32xf32>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> {
   %0 = linalg.matmul ins(%A, %B: tensor<16x8xf32>, tensor<8x32xf32>)
-                    outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
+                    inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
   return %0: tensor<16x32xf32>
 }
 
@@ -39,7 +39,7 @@
 
 // CHECK: linalg.generic
 // CHECK-SAME:  ins(%{{.+}}, %{{.+}} : tensor<16x8xf32>, tensor<8x32xf32>)
-// CHECK-SAME: outs(%{{.+}} : tensor<16x32xf32>)
+// CHECK-SAME: inits(%{{.+}} : tensor<16x32xf32>)
 
 // CHECK:      ^{{.*}}(%[[A_ARG:.+]]: f32, %[[B_ARG:.+]]: f32, %[[C_ARG:.+]]: f32)
 // CHECK-NEXT:   %[[MUL:.+]] = arith.mulf %[[A_ARG]], %[[B_ARG]] : f32
@@ -54,7 +54,7 @@
                                             %C: tensor<16x32xcomplex<f32>>)
           -> tensor<16x32xcomplex<f32>> {
   %0 = linalg.matmul ins(%A, %B: tensor<16x8xcomplex<f32>>, tensor<8x32xcomplex<f32>>)
-                    outs(%C: tensor<16x32xcomplex<f32>>) -> tensor<16x32xcomplex<f32>>
+                    inits(%C: tensor<16x32xcomplex<f32>>) -> tensor<16x32xcomplex<f32>>
   return %0: tensor<16x32xcomplex<f32>>
 }
 
@@ -62,7 +62,7 @@
 
 // CHECK: linalg.generic
 // CHECK-SAME:  ins(%{{.+}}, %{{.+}} : tensor<16x8xcomplex<f32>>, tensor<8x32xcomplex<f32>>)
-// CHECK-SAME: outs(%{{.+}} : tensor<16x32xcomplex<f32>>)
+// CHECK-SAME: inits(%{{.+}} : tensor<16x32xcomplex<f32>>)
 
 // CHECK:      ^{{.*}}(%[[A_ARG:.+]]: complex<f32>, %[[B_ARG:.+]]: complex<f32>, %[[C_ARG:.+]]: complex<f32>)
 // CHECK-NEXT:   %[[MUL:.+]] = complex.mul %[[A_ARG]], %[[B_ARG]] : complex<f32>
@@ -76,7 +76,7 @@
   linalg.depthwise_conv_2d_nhwc_hwcm
      { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> }
      ins(%input, %filter : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>)
-    outs(%output : memref<2x3x4x2x3xf32>)
+    inits(%output : memref<2x3x4x2x3xf32>)
   return
 }
 
@@ -90,7 +90,7 @@
 // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]}
 // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>)
-// CHECK-SAME: outs(%{{.+}} : memref<2x3x4x2x3xf32>)
+// CHECK-SAME: inits(%{{.+}} : memref<2x3x4x2x3xf32>)
 
 // CHECK:         ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32)
 // CHECK-NEXT:      %[[MUL:.+]] = arith.mulf %[[BBARG0]], %[[BBARG1]] : f32
@@ -103,7 +103,7 @@
   linalg.depthwise_conv_2d_nhwc_hwcm
      { dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> }
      ins(%input, %filter : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>)
-    outs(%output : memref<2x2x3x2x3xf32>)
+    inits(%output : memref<2x2x3x2x3xf32>)
   return
 }
 
@@ -117,7 +117,7 @@
 // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]}
 // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>)
-// CHECK-SAME: outs(%{{.+}} : memref<2x2x3x2x3xf32>)
+// CHECK-SAME: inits(%{{.+}} : memref<2x2x3x2x3xf32>)
 
 // CHECK:         ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32)
 // CHECK-NEXT:      %[[MUL:.+]] = arith.mulf %[[BBARG0]], %[[BBARG1]] : f32
@@ -129,7 +129,7 @@
 func.func @depthwise_conv_2d_nhwc_hwc(%input: memref<1x113x113x96xf32>, %filter: memref<3x3x96xf32>, %output: memref<1x56x56x96xf32>) {
   linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>}
     ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>)
-    outs(%output: memref<1x56x56x96xf32>)
+    inits(%output: memref<1x56x56x96xf32>)
   return
 }
 
@@ -143,7 +143,7 @@
 // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]}
 // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x113x113x96xf32>, memref<3x3x96xf32>)
-// CHECK-SAME: outs(%{{.+}} : memref<1x56x56x96xf32>)
+// CHECK-SAME: inits(%{{.+}} : memref<1x56x56x96xf32>)
 
 // CHECK:         ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32)
 // CHECK-NEXT:      %[[MUL:.+]] = arith.mulf %[[BBARG0]], %[[BBARG1]] : f32
@@ -169,7 +169,7 @@
 // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"]}
 // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<?x?x?xf32>, memref<?x?x?xf32>)
-// CHECK-SAME: outs(%{{.+}} : memref<?x?x?xf32>)
+// CHECK-SAME: inits(%{{.+}} : memref<?x?x?xf32>)
 
 // CHECK:         ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32)
 // CHECK-NEXT:      %[[MUL:.+]] = arith.mulf %[[BBARG0]], %[[BBARG1]] : f32
@@ -195,7 +195,7 @@
 // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"]}
 // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<?x?x?xf32>, memref<?x?x?xf32>)
-// CHECK-SAME: outs(%{{.+}} : memref<?x?x?xf32>)
+// CHECK-SAME: inits(%{{.+}} : memref<?x?x?xf32>)
 
 // CHECK:         ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32)
 // CHECK-NEXT:      %[[MUL:.+]] = arith.mulf %[[BBARG0]], %[[BBARG1]] : f32
@@ -205,7 +205,7 @@
 // -----
 
 func.func @generalize_fill(%output: memref<?x?xf32>, %value : f32) {
-  linalg.fill ins(%value : f32) outs(%output : memref<?x?xf32>)
+  linalg.fill ins(%value : f32) inits(%output : memref<?x?xf32>)
   return
 }
 
@@ -219,7 +219,7 @@
 // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel"]}
 // CHECK-SAME: ins(%[[VAL]] : f32)
-// CHECK-SAME: outs(%{{.+}} : memref<?x?xf32>)
+// CHECK-SAME: inits(%{{.+}} : memref<?x?xf32>)
 
 // CHECK:         ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32)
 // CHECK-NEXT:      linalg.yield %[[BBARG0]] : f32
@@ -228,7 +228,7 @@
 
 func.func @generalize_batch_matm_vec(%lhs : memref<?x?x?xi8>, %rhs: memref<?x?xi8>,  %out: memref<?x?xf32>) {
   linalg.batch_matvec ins(%lhs, %rhs: memref<?x?x?xi8>, memref<?x?xi8>)
-                     outs(%out: memref<?x?xf32>)
+                     inits(%out: memref<?x?xf32>)
   return
 }
 // CHECK: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
@@ -241,7 +241,7 @@
 // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]}
 // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<?x?x?xi8>, memref<?x?xi8>)
-// CHECK-SAME: outs(%{{.+}} : memref<?x?xf32>)
+// CHECK-SAME: inits(%{{.+}} : memref<?x?xf32>)
 // CHECK:         ^{{.+}}(%[[BBARG0:.+]]: i8, %[[BBARG1:.+]]: i8, %[[BBARG2:.+]]: f32)
 // CHECK:            %[[BBARG0_F32:.+]] = arith.sitofp %[[BBARG0]] : i8 to f32
 // CHECK:            %[[BBARG1_F32:.+]] = arith.sitofp %[[BBARG1]] : i8 to f32
@@ -253,7 +253,7 @@
 
 func.func @batch_reduce_gemm(%lhs: memref<7x8x9xf32>, %rhs: memref<7x9x8xf32>, %out: memref<8x8xf32>) {
   linalg.batch_reduce_matmul ins(%lhs, %rhs: memref<7x8x9xf32>, memref<7x9x8xf32>)
-                             outs(%out: memref<8x8xf32>)
+                             inits(%out: memref<8x8xf32>)
   return
 }
 
@@ -267,7 +267,7 @@
 // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
 // CHECK-SAME: iterator_types = ["reduction", "parallel", "parallel", "reduction"]}
 // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<7x8x9xf32>, memref<7x9x8xf32>)
-// CHECK-SAME: outs(%{{.+}} : memref<8x8xf32>
+// CHECK-SAME: inits(%{{.+}} : memref<8x8xf32>
 // CHECK:         ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32)
 // CHECK:         %[[MUL:.+]] = arith.mulf %[[BBARG0]], %[[BBARG1]] : f32
 // CHECK:         %[[ADD:.+]] = arith.addf %[[BBARG2]], %[[MUL]] : f32
diff --git a/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir
--- a/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir
+++ b/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir
@@ -3,7 +3,7 @@
 // Verifies that different argument types is legal.
 func.func @generalize_matmul_tensor_f16f64f32(%A : tensor<16x8xf16>, %B: tensor<8x32xf64>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> {
   %0 = linalg.matmul ins(%A, %B: tensor<16x8xf16>, tensor<8x32xf64>)
-                          outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
+                          inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
   return %0: tensor<16x32xf32>
 }
 
@@ -22,7 +22,7 @@
 // Verifies that different argument types is legal.
 func.func @generalize_matmul_tensor_i16i64i32(%A : tensor<16x8xi16>, %B: tensor<8x32xi64>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> {
   %0 = linalg.matmul ins(%A, %B: tensor<16x8xi16>, tensor<8x32xi64>)
-                          outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32>
+                          inits(%C: tensor<16x32xi32>) -> tensor<16x32xi32>
   return %0: tensor<16x32xi32>
 }
 
@@ -43,7 +43,7 @@
 func.func @generalize_matmul_tensor_i16i64i32_unsigned(%A : tensor<16x8xi16>, %B: tensor<8x32xi64>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> {
   %0 = linalg.matmul {cast = #linalg.type_fn<cast_unsigned>}
                      ins(%A, %B: tensor<16x8xi16>, tensor<8x32xi64>)
-                          outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32>
+                          inits(%C: tensor<16x32xi32>) -> tensor<16x32xi32>
   return %0: tensor<16x32xi32>
 }
 
@@ -54,7 +54,7 @@
 
 func.func @generalize_matmul_tensor_i16i64f32(%A : tensor<16x8xi16>, %B: tensor<8x32xi64>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> {
   %0 = linalg.matmul ins(%A, %B: tensor<16x8xi16>, tensor<8x32xi64>)
-                     outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
+                     inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
   return %0: tensor<16x32xf32>
 }
 
@@ -67,7 +67,7 @@
 
 func.func @generalize_matmul_tensor_f16f64i32(%A : tensor<16x8xf16>, %B: tensor<8x32xf64>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> {
   %0 = linalg.matmul ins(%A, %B: tensor<16x8xf16>, tensor<8x32xf64>)
-                              outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32>
+                              inits(%C: tensor<16x32xi32>) -> tensor<16x32xi32>
   return %0: tensor<16x32xi32>
 }
 
@@ -80,7 +80,7 @@
 
 func.func @generalize_matmul_unsigned_tensor_i16i64i32(%A : tensor<16x8xi16>, %B: tensor<8x32xi64>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> {
   %0 = linalg.matmul_unsigned ins(%A, %B: tensor<16x8xi16>, tensor<8x32xi64>)
-                              outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32>
+                              inits(%C: tensor<16x32xi32>) -> tensor<16x32xi32>
   return %0: tensor<16x32xi32>
 }
 
@@ -93,7 +93,7 @@
 
 func.func @generalize_matmul_unsigned_tensor_i16i64f32(%A : tensor<16x8xi16>, %B: tensor<8x32xi64>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> {
   %0 = linalg.matmul_unsigned ins(%A, %B: tensor<16x8xi16>, tensor<8x32xi64>)
-                              outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
+                              inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
   return %0: tensor<16x32xf32>
 }
 
@@ -106,7 +106,7 @@
 
 func.func @generalize_matmul_unsigned_tensor_f16f64i32(%A : tensor<16x8xf16>, %B: tensor<8x32xf64>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> {
   %0 = linalg.matmul_unsigned ins(%A, %B: tensor<16x8xf16>, tensor<8x32xf64>)
-                              outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32>
+                              inits(%C: tensor<16x32xi32>) -> tensor<16x32xi32>
   return %0: tensor<16x32xi32>
 }
 
@@ -119,7 +119,7 @@
 
 func.func @generalize_pooling_nhwc_max_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> {
   %0 = linalg.pooling_nhwc_max {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
-    ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32>
+    ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) inits(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32>
   return %0: tensor<1x2x4x1xf32>
 }
 
@@ -133,7 +133,7 @@
 
 func.func @generalize_pooling_nwc_max_f32(%input : tensor<1x16x1xf32>, %shape: tensor<2xf32>, %output: tensor<1x4x1xf32>) -> tensor<1x4x1xf32> {
   %0 = linalg.pooling_nwc_max {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>}
-    ins(%input, %shape : tensor<1x16x1xf32>, tensor<2xf32>) outs(%output : tensor<1x4x1xf32>) -> tensor<1x4x1xf32>
+    ins(%input, %shape : tensor<1x16x1xf32>, tensor<2xf32>) inits(%output : tensor<1x4x1xf32>) -> tensor<1x4x1xf32>
   return %0: tensor<1x4x1xf32>
 }
 
@@ -147,7 +147,7 @@
 
 func.func @generalize_pooling_nhwc_max_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> {
   %0 = linalg.pooling_nhwc_max {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
-    ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32>
+    ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) inits(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32>
   return %0: tensor<1x2x4x1xi32>
 }
 
@@ -159,7 +159,7 @@
 
 func.func @generalize_pooling_nwc_max_i32(%input : tensor<1x16x1xi32>, %shape: tensor<2xi32>, %output: tensor<1x4x1xi32>) -> tensor<1x4x1xi32> {
   %0 = linalg.pooling_nwc_max {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>}
-    ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) outs(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32>
+    ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) inits(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32>
   return %0: tensor<1x4x1xi32>
 }
 
@@ -171,7 +171,7 @@
 
 func.func @generalize_pooling_nhwc_max_unsigned_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> {
   %0 = linalg.pooling_nhwc_max_unsigned {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
-    ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32>
+    ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) inits(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32>
   return %0: tensor<1x2x4x1xi32>
 }
 
@@ -183,7 +183,7 @@
 
 func.func @generalize_pooling_nwc_max_unsigned_i32(%input : tensor<1x16x1xi32>, %shape: tensor<2xi32>, %output: tensor<1x4x1xi32>) -> tensor<1x4x1xi32> {
   %0 = linalg.pooling_nwc_max_unsigned {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>}
-    ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) outs(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32>
+    ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) inits(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32>
   return %0: tensor<1x4x1xi32>
 }
 
@@ -195,7 +195,7 @@
 
 func.func @generalize_pooling_nhwc_min_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> {
   %0 = linalg.pooling_nhwc_min {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
-    ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32>
+    ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) inits(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32>
   return %0: tensor<1x2x4x1xf32>
 }
 
@@ -209,7 +209,7 @@
 
 func.func @generalize_pooling_nwc_min_f32(%input : tensor<1x16x1xf32>, %shape: tensor<2xf32>, %output: tensor<1x4x1xf32>) -> tensor<1x4x1xf32> {
   %0 = linalg.pooling_nwc_min {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>}
-    ins(%input, %shape : tensor<1x16x1xf32>, tensor<2xf32>) outs(%output : tensor<1x4x1xf32>) -> tensor<1x4x1xf32>
+    ins(%input, %shape : tensor<1x16x1xf32>, tensor<2xf32>) inits(%output : tensor<1x4x1xf32>) -> tensor<1x4x1xf32>
   return %0: tensor<1x4x1xf32>
 }
 
@@ -223,7 +223,7 @@
 
 func.func @generalize_pooling_nhwc_min_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> {
   %0 = linalg.pooling_nhwc_min {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
-    ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32>
+    ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) inits(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32>
   return %0: tensor<1x2x4x1xi32>
 }
 
@@ -235,7 +235,7 @@
 
 func.func @generalize_pooling_nwc_min_i32(%input : tensor<1x16x1xi32>, %shape: tensor<2xi32>, %output: tensor<1x4x1xi32>) -> tensor<1x4x1xi32> {
   %0 = linalg.pooling_nwc_min {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>}
-    ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) outs(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32>
+    ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) inits(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32>
   return %0: tensor<1x4x1xi32>
 }
 
@@ -247,7 +247,7 @@
 
 func.func @generalize_pooling_nhwc_min_unsigned_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> {
   %0 = linalg.pooling_nhwc_min_unsigned {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
-    ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32>
+    ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) inits(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32>
   return %0: tensor<1x2x4x1xi32>
 }
 
@@ -259,7 +259,7 @@
 
 func.func @generalize_pooling_nwc_min_unsigned_i32(%input : tensor<1x16x1xi32>, %shape: tensor<2xi32>, %output: tensor<1x4x1xi32>) -> tensor<1x4x1xi32> {
   %0 = linalg.pooling_nwc_min_unsigned {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>}
-    ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) outs(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32>
+    ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) inits(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32>
   return %0: tensor<1x4x1xi32>
 }
 
@@ -271,7 +271,7 @@
 
 func.func @generalize_pooling_nhwc_sum_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> {
   %0 = linalg.pooling_nhwc_sum {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
-    ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32>
+    ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) inits(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32>
   return %0: tensor<1x2x4x1xf32>
 }
 
@@ -285,7 +285,7 @@
 
 func.func @generalize_pooling_nwc_sum_f32(%input : tensor<1x16x1xf32>, %shape: tensor<2xf32>, %output: tensor<1x4x1xf32>) -> tensor<1x4x1xf32> {
   %0 = linalg.pooling_nwc_sum {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>}
-    ins(%input, %shape : tensor<1x16x1xf32>, tensor<2xf32>) outs(%output : tensor<1x4x1xf32>) -> tensor<1x4x1xf32>
+    ins(%input, %shape : tensor<1x16x1xf32>, tensor<2xf32>) inits(%output : tensor<1x4x1xf32>) -> tensor<1x4x1xf32>
   return %0: tensor<1x4x1xf32>
 }
 
@@ -299,7 +299,7 @@
 
 func.func @generalize_pooling_nhwc_sum_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> {
   %0 = linalg.pooling_nhwc_sum {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
-    ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32>
+    ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) inits(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32>
   return %0: tensor<1x2x4x1xi32>
 }
 
@@ -313,7 +313,7 @@
 
 func.func @generalize_pooling_nwc_sum_i32(%input : tensor<1x16x1xi32>, %shape: tensor<2xi32>, %output: tensor<1x4x1xi32>) -> tensor<1x4x1xi32> {
   %0 = linalg.pooling_nwc_sum {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>}
-    ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) outs(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32>
+    ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) inits(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32>
   return %0: tensor<1x4x1xi32>
 }
 
@@ -326,7 +326,7 @@
 // -----
 
 func.func @generalize_fill_0d(%value: f64, %O: tensor<f32>) -> tensor<f32> {
-  %0 = linalg.fill ins(%value: f64) outs(%O : tensor<f32>) -> tensor<f32>
+  %0 = linalg.fill ins(%value: f64) inits(%O : tensor<f32>) -> tensor<f32>
   return %0: tensor<f32>
 }
 
@@ -340,7 +340,7 @@
 // -----
 
 func.func @generalize_fill_2d(%value: f64, %O: memref<16x32xf32>) {
-  linalg.fill ins(%value: f64) outs(%O : memref<16x32xf32>)
+  linalg.fill ins(%value: f64) inits(%O : memref<16x32xf32>)
   return
 }
 
@@ -355,7 +355,7 @@
 // -----
 
 func.func @generalize_index(%min: f64, %max: f64, %seed: i32, %O: tensor<16x32xf32>) -> tensor<16x32xf32> {
-  %0 = linalg.fill_rng_2d ins(%min, %max, %seed: f64, f64, i32) outs(%O : tensor<16x32xf32>) -> tensor<16x32xf32>
+  %0 = linalg.fill_rng_2d ins(%min, %max, %seed: f64, f64, i32) inits(%O : tensor<16x32xf32>) -> tensor<16x32xf32>
   return %0: tensor<16x32xf32>
 }
 
@@ -368,7 +368,7 @@
 // -----
 
 func.func @generalize_const(%min: f64, %max: f64, %seed: i32, %O: tensor<16x32xf32>) -> tensor<16x32xf32> {
-  %0 = linalg.fill_rng_2d ins(%min, %max, %seed: f64, f64, i32) outs(%O : tensor<16x32xf32>) -> tensor<16x32xf32>
+  %0 = linalg.fill_rng_2d ins(%min, %max, %seed: f64, f64, i32) inits(%O : tensor<16x32xf32>) -> tensor<16x32xf32>
   return %0: tensor<16x32xf32>
 }
 
@@ -381,7 +381,7 @@
 
 // Verifies the default value of the fun attribute is an exp op.
 func.func @generalize_elemwise_exp(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
-  %0 = linalg.elemwise_unary ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
+  %0 = linalg.elemwise_unary ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
   return %0: tensor<4x8xf32>
 }
 
@@ -393,7 +393,7 @@
 // Verifies the fun attribute controls the unary function used.
 func.func @generalize_elemwise_log(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
   %0 = linalg.elemwise_unary {fun = #linalg.unary_fn<log>}
-                              ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
+                              ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
   return %0: tensor<4x8xf32>
 }
 
@@ -405,7 +405,7 @@
 // Verifies the fun attribute controls the unary function used.
 func.func @generalize_elemwise_abs(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
   %0 = linalg.elemwise_unary {fun = #linalg.unary_fn<abs>}
-                              ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
+                              ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
   return %0: tensor<4x8xf32>
 }
 
@@ -417,7 +417,7 @@
 // Verifies the fun attribute controls the unary function used.
 func.func @generalize_elemwise_ceil(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
   %0 = linalg.elemwise_unary {fun = #linalg.unary_fn<ceil>}
-                              ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
+                              ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
   return %0: tensor<4x8xf32>
 }
 
@@ -429,7 +429,7 @@
 // Verifies the fun attribute controls the unary function used.
 func.func @generalize_elemwise_floor(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
   %0 = linalg.elemwise_unary {fun = #linalg.unary_fn<floor>}
-                              ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
+                              ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
   return %0: tensor<4x8xf32>
 }
 
@@ -441,7 +441,7 @@
 // Verifies the fun attribute controls the unary function used.
 func.func @generalize_elemwise_negf(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
   %0 = linalg.elemwise_unary {fun = #linalg.unary_fn<negf>}
-                              ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
+                              ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
   return %0: tensor<4x8xf32>
 }
 
@@ -453,7 +453,7 @@
 // Verifies the default value of the fun attribute is an add op.
 func.func @generalize_elemwise_add(%lhs : tensor<4x8xf32>, %rhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
   %0 = linalg.elemwise_binary ins(%lhs, %rhs: tensor<4x8xf32>, tensor<4x8xf32>)
-                              outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
+                              inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
   return %0: tensor<4x8xf32>
 }
 
@@ -466,7 +466,7 @@
 func.func @generalize_elemwise_mul(%lhs : tensor<4x8xf32>, %rhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
   %0 = linalg.elemwise_binary {fun = #linalg.binary_fn<mul>}
                               ins(%lhs, %rhs: tensor<4x8xf32>, tensor<4x8xf32>)
-                              outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
+                              inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
   return %0: tensor<4x8xf32>
 }
 
@@ -479,7 +479,7 @@
 func.func @generalize_elemwise_rank_zero(%lhs : tensor<f32>, %rhs : tensor<f32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
   %0 = linalg.elemwise_binary {fun = #linalg.binary_fn<sub>}
                               ins(%lhs, %rhs: tensor<f32>, tensor<f32>)
-                              outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
+                              inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
   return %0: tensor<4x8xf32>
 }
 
@@ -492,7 +492,7 @@
 
 // Verifies the fun attribute controls the binary function used.
 func.func @generalize_copy(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
-  %0 = linalg.copy ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
+  %0 = linalg.copy ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
   return %0: tensor<4x8xf32>
 }
 
diff --git a/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir b/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir
--- a/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir
+++ b/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir
@@ -4,7 +4,7 @@
 // CHECK-SAME:                                             %[[IN:.*]]: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> {
 // CHECK:           %[[C0:.*]] = arith.constant 0.000000e+00 : f32
 // CHECK:           %[[INIT:.*]] = tensor.empty() : tensor<1x32x32x1xf32>
-// CHECK:           %[[FILL:.*]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x32x32x1xf32>) -> tensor<1x32x32x1xf32>
+// CHECK:           %[[FILL:.*]] = linalg.fill ins(%[[C0]] : f32) inits(%[[INIT]] : tensor<1x32x32x1xf32>) -> tensor<1x32x32x1xf32>
 // CHECK:           %[[PADDED:.*]] = tensor.insert_slice %[[IN]] into %[[FILL]][0, 2, 2, 0] [1, 28, 28, 1] [1, 1, 1, 1] : tensor<1x28x28x1xf32> into tensor<1x32x32x1xf32>
 // CHECK:           return %[[PADDED]] : tensor<1x32x32x1xf32>
 func.func @generalize_pad_tensor_static_shape(%arg0: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> {
@@ -29,7 +29,7 @@
 // CHECK:           %[[DIM3:.*]] = tensor.dim %[[IN]], %[[C3]] : tensor<4x?x2x?xf32>
 // CHECK:           %[[OUT_DIM3:.*]] = arith.addi %[[DIM3]], %[[OFFSET]] : index
 // CHECK:           %[[INIT:.*]] = tensor.empty(%[[DIM1]], %[[OUT_DIM2]], %[[OUT_DIM3]]) : tensor<4x?x?x?xf32>
-// CHECK:           %[[FILL:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[INIT]] : tensor<4x?x?x?xf32>) -> tensor<4x?x?x?xf32>
+// CHECK:           %[[FILL:.*]] = linalg.fill ins(%[[CST]] : f32) inits(%[[INIT]] : tensor<4x?x?x?xf32>) -> tensor<4x?x?x?xf32>
 // CHECK:           %[[DIM1_1:.*]] = tensor.dim %[[IN]], %[[C1]] : tensor<4x?x2x?xf32>
 // CHECK:           %[[DIM3_1:.*]] = tensor.dim %[[IN]], %[[C3]] : tensor<4x?x2x?xf32>
 // CHECK:           %[[PADDED:.*]] = tensor.insert_slice %[[IN]] into %[[FILL]]{{\[}}%[[C0]], %[[C0]], %[[OFFSET]], %[[C0]]] [4, %[[DIM1_1]], 2, %[[DIM3_1]]] [1, 1, 1, 1] : tensor<4x?x2x?xf32> into tensor<4x?x?x?xf32>
diff --git a/mlir/test/Dialect/Linalg/generalize-tensor-pack-tile.mlir b/mlir/test/Dialect/Linalg/generalize-tensor-pack-tile.mlir
--- a/mlir/test/Dialect/Linalg/generalize-tensor-pack-tile.mlir
+++ b/mlir/test/Dialect/Linalg/generalize-tensor-pack-tile.mlir
@@ -20,7 +20,7 @@
 // CHECK:             %[[EMPTY:.+]] = tensor.empty() : tensor<8x32xf32>
 // CHECK:             %[[TRANSP:.+]] =  linalg.transpose
 // CHECK-SAME:          ins(%[[TILE]]
-// CHECK-SAME:          outs(%[[EMPTY]]
+// CHECK-SAME:          inits(%[[EMPTY]]
 // CHECK-SAME:          permutation = [1, 0]
 // CHECK:             %{{.+}} = tensor.insert_slice %[[TRANSP]] into %{{.+}}
 
@@ -49,7 +49,7 @@
 // CHECK:             %[[EMPTY:.+]] = tensor.empty() : tensor<8x2xf32>
 // CHECK:         %[[TRANSP:.+]] = linalg.transpose
 // CHECK-SAME:      ins(%[[PAD]] : tensor<8x2xf32>)
-// CHECK-SAME:      outs(%[[EMPTY]] : tensor<8x2xf32>)
+// CHECK-SAME:      inits(%[[EMPTY]] : tensor<8x2xf32>)
 // CHECK-SAME:      permutation = [0, 1]
 // CHECK:         %{{.+}} = tensor.insert_slice %[[TRANSP]] into %{{.+}}
 
@@ -80,7 +80,7 @@
 // CHECK:             %[[EMPTY:.+]] = tensor.empty() : tensor<32x8xf32>
 // CHECK:             %[[TRANSP:.+]] =  linalg.transpose
 // CHECK-SAME:          ins(%[[TILE]]
-// CHECK-SAME:          outs(%[[EMPTY]]
+// CHECK-SAME:          inits(%[[EMPTY]]
 // CHECK-SAME:          permutation = [0, 1]
 // CHECK:             %[[SUB_ITER:.+]] = tensor.insert_slice %[[TRANSP]] into %{{[a-zA-Z0-9]+}}
 // CHECK-SAME:          [0, 0, 0, 0] [1, 1, 32, 8] [1, 1, 1, 1] : tensor<32x8xf32> into tensor<1x1x32x8xf32>
diff --git a/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir b/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir
--- a/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir
+++ b/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir
@@ -11,7 +11,7 @@
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<8x32xf32>
 // CHECK:         %[[TRANSP:.+]] =  linalg.transpose
 // CHECK-SAME:      ins(%[[TILE]] : tensor<32x8xf32>)
-// CHECK-SAME:      outs(%[[EMPTY]] : tensor<8x32xf32>)
+// CHECK-SAME:      inits(%[[EMPTY]] : tensor<8x32xf32>)
 // CHECK-SAME:      permutation = [1, 0]
 // CHECK:         %[[INSERT:.+]] = tensor.insert_slice %[[TRANSP]] into %[[DEST]]
 // CHECK-SAME:      [0, 0, 0, 0, 0, 0] [1, 1, 1, 1, 8, 32] [1, 1, 1, 1, 1, 1]
@@ -35,7 +35,7 @@
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<8x2xf32>
 // CHECK:         %[[TRANSP:.+]] = linalg.transpose
 // CHECK-SAME:      ins(%[[PAD]] : tensor<8x2xf32>)
-// CHECK-SAME:      outs(%[[EMPTY]] : tensor<8x2xf32>)
+// CHECK-SAME:      inits(%[[EMPTY]] : tensor<8x2xf32>)
 // CHECK-SAME:      permutation = [0, 1]
 // CHECK:         %[[INSERT:.+]] = tensor.insert_slice %[[TRANSP]] into %[[DEST]]
 // CHECK-SAME:      [0, 0, 0, 0] [1, 1, 8, 2] [1, 1, 1, 1]
@@ -53,7 +53,7 @@
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<32x8xf32>
 // CHECK:         %[[TRANSP:.+]] =  linalg.transpose
 // CHECK-SAME:      ins(%[[SRC]] : tensor<32x8xf32>)
-// CHECK-SAME:      outs(%[[EMPTY]] : tensor<32x8xf32>)
+// CHECK-SAME:      inits(%[[EMPTY]] : tensor<32x8xf32>)
 // CHECK-SAME:      permutation = [0, 1]
 // CHECK:         %[[INSERT:.+]] = tensor.insert_slice %[[TRANSP]] into %[[DEST]]
 // CHECK-SAME:      [0, 0, 0, 0] [1, 1, 32, 8] [1, 1, 1, 1]
@@ -71,7 +71,7 @@
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<5x7x3xf32>
 // CHECK:         %[[TRANSP:.+]] =  linalg.transpose
 // CHECK-SAME:      ins(%[[SRC]] : tensor<3x5x7xf32>)
-// CHECK-SAME:      outs(%[[EMPTY]] : tensor<5x7x3xf32>)
+// CHECK-SAME:      inits(%[[EMPTY]] : tensor<5x7x3xf32>)
 // CHECK-SAME:      permutation = [1, 2, 0]
 // CHECK:         %[[INSERT:.+]] = tensor.insert_slice %[[TRANSP]] into %[[DEST]]
 // CHECK-SAME:      [0, 0, 0, 0, 0, 0] [1, 1, 1, 5, 7, 3] [1, 1, 1, 1, 1, 1]
diff --git a/mlir/test/Dialect/Linalg/generalize-tensor-unpack-tile.mlir b/mlir/test/Dialect/Linalg/generalize-tensor-unpack-tile.mlir
--- a/mlir/test/Dialect/Linalg/generalize-tensor-unpack-tile.mlir
+++ b/mlir/test/Dialect/Linalg/generalize-tensor-unpack-tile.mlir
@@ -26,7 +26,7 @@
 // CHECK:             %[[EMPTY:.+]] = tensor.empty() : tensor<32x8xf32>
 // CHECK:             %[[TRANSP:.+]] =  linalg.transpose
 // CHECK-SAME:          ins(%[[TILE]]
-// CHECK-SAME:          outs(%[[EMPTY]]
+// CHECK-SAME:          inits(%[[EMPTY]]
 // CHECK-SAME:          permutation = [1, 0]
 // CHECK:             %{{.+}} = tensor.insert_slice %[[TRANSP]] into %{{.+}}
 
@@ -58,7 +58,7 @@
 // CHECK:             %[[EMPTY:.+]] = tensor.empty() : tensor<8x2xf32>
 // CHECK:             %[[TRANSP:.+]] =  linalg.transpose
 // CHECK-SAME:          ins(%[[TILE]] : tensor<8x2xf32>)
-// CHECK-SAME:          outs(%[[EMPTY]] : tensor<8x2xf32>)
+// CHECK-SAME:          inits(%[[EMPTY]] : tensor<8x2xf32>)
 // CHECK-SAME:          permutation = [0, 1]
 // CHECK:             %[[UNPACK_TILE:.+]] = tensor.extract_slice %[[TRANSP]]
 // CHECK-SAME:          [0, 0] [%[[OUT_I_SZ]], %[[OUT_J_SZ]]] [1, 1]
@@ -95,7 +95,7 @@
 // CHECK:             %[[EMPTY:.+]] = tensor.empty() : tensor<32x8xf32>
 // CHECK:             %[[TRANSP:.+]] =  linalg.transpose
 // CHECK-SAME:          ins(%[[TILE]]
-// CHECK-SAME:          outs(%[[EMPTY]]
+// CHECK-SAME:          inits(%[[EMPTY]]
 // CHECK-SAME:          permutation = [0, 1]
 // CHECK:             %[[INSERT:.+]] = tensor.insert_slice %[[TRANSP]] into %{{[a-zA-Z0-9]+}}
 // CHECK-SAME:          [%[[K]], %[[C]]] [32, 8] [1, 1]
diff --git a/mlir/test/Dialect/Linalg/generalize-tensor-unpack.mlir b/mlir/test/Dialect/Linalg/generalize-tensor-unpack.mlir
--- a/mlir/test/Dialect/Linalg/generalize-tensor-unpack.mlir
+++ b/mlir/test/Dialect/Linalg/generalize-tensor-unpack.mlir
@@ -11,7 +11,7 @@
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<32x8xf32>
 // CHECK:         %[[TRANSP:.+]] =  linalg.transpose
 // CHECK-SAME:      ins(%[[TILE]] : tensor<8x32xf32>)
-// CHECK-SAME:      outs(%[[EMPTY]] : tensor<32x8xf32>)
+// CHECK-SAME:      inits(%[[EMPTY]] : tensor<32x8xf32>)
 // CHECK-SAME:      permutation = [1, 0]
 // CHECK:         %[[INSERT:.+]] = tensor.insert_slice %[[TRANSP]] into %[[DEST]]
 // CHECK-SAME:      [0, 0, 0, 0] [1, 1, 32, 8] [1, 1, 1, 1]
@@ -30,7 +30,7 @@
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<8x2xf32>
 // CHECK:         %[[TRANSP:.+]] =  linalg.transpose
 // CHECK-SAME:      ins(%[[TILE]] : tensor<8x2xf32>)
-// CHECK-SAME:      outs(%[[EMPTY]] : tensor<8x2xf32>)
+// CHECK-SAME:      inits(%[[EMPTY]] : tensor<8x2xf32>)
 // CHECK-SAME:      permutation = [0, 1]
 //                They have the same type, so the insert_slice op is folded
 //                away.
@@ -50,7 +50,7 @@
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<32x8xf32>
 // CHECK:         %[[TRANSP:.+]] =  linalg.transpose
 // CHECK-SAME:      ins(%[[TILE]] : tensor<32x8xf32>)
-// CHECK-SAME:      outs(%[[EMPTY]] : tensor<32x8xf32>)
+// CHECK-SAME:      inits(%[[EMPTY]] : tensor<32x8xf32>)
 // CHECK-SAME:      permutation = [0, 1]
 //                They have the same type, so the insert_slice op is folded
 //                away.
diff --git a/mlir/test/Dialect/Linalg/inline-scalar-operands.mlir b/mlir/test/Dialect/Linalg/inline-scalar-operands.mlir
--- a/mlir/test/Dialect/Linalg/inline-scalar-operands.mlir
+++ b/mlir/test/Dialect/Linalg/inline-scalar-operands.mlir
@@ -12,7 +12,7 @@
     %1 = linalg.generic {indexing_maps = [#map2, #map3, #map2],
                          iterator_types = ["parallel"]}
                          ins(%arg0, %scalar : tensor<4xf32>, tensor<f32>)
-                         outs(%0 : tensor<4xf32>) {
+                         inits(%0 : tensor<4xf32>) {
     // CHECK: ^bb0(%{{.*}}: f32, %{{.*}}: f32)
     ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
       // CHECK: tensor.extract %[[SCALAR]][]
@@ -37,7 +37,7 @@
     %1 = linalg.generic {indexing_maps = [#map2, #map3, #map2],
                          iterator_types = ["parallel"]}
                          ins(%arg0, %scalar : tensor<4xf32>, tensor<1xf32>)
-                         outs(%0 : tensor<4xf32>) {
+                         inits(%0 : tensor<4xf32>) {
     // CHECK: ^bb0(%{{.*}}: f32, %{{.*}}: f32)
     ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
       // CHECK: tensor.extract %[[SCALAR]][%[[ZERO]]]
diff --git a/mlir/test/Dialect/Linalg/inlining.mlir b/mlir/test/Dialect/Linalg/inlining.mlir
--- a/mlir/test/Dialect/Linalg/inlining.mlir
+++ b/mlir/test/Dialect/Linalg/inlining.mlir
@@ -23,7 +23,7 @@
   // CHECK: linalg.generic
   linalg.generic #trait
      ins(%arg0 : memref<?xf32>)
-    outs(%arg0 : memref<?xf32>) {
+    inits(%arg0 : memref<?xf32>) {
     ^bb(%0 : f32, %1 : f32) :
       %2 = arith.addf %0, %0: f32
       linalg.yield %2 : f32
diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir
--- a/mlir/test/Dialect/Linalg/invalid.mlir
+++ b/mlir/test/Dialect/Linalg/invalid.mlir
@@ -36,7 +36,7 @@
   linalg.generic {
       indexing_maps =  [ affine_map<() -> ()> ],
       iterator_types = []}
-      outs(%arg0 : memref<f32>) {
+      inits(%arg0 : memref<f32>) {
     ^bb(%0: f32):
       linalg.index 2 : index
       linalg.yield %0 : f32
@@ -50,7 +50,7 @@
   linalg.generic {
       indexing_maps =  [ affine_map<() -> ()> ],
       iterator_types = []}
-      outs(%arg0 : memref<f32>) {
+      inits(%arg0 : memref<f32>) {
     ^bb(%0: f32):
       linalg.index -1 : index
       linalg.yield %0 : f32
@@ -74,7 +74,7 @@
   linalg.generic {
       indexing_maps =  [ affine_map<() -> ()> ],
       iterator_types = []}
-      outs(%arg0 : memref<f32>) {
+      inits(%arg0 : memref<f32>) {
     ^bb(%0: f32):
       linalg.yield
   }
@@ -87,7 +87,7 @@
   linalg.generic {
     indexing_maps =  [ affine_map<() -> (0)> ],
     iterator_types = ["parallel"]}
-      outs(%arg0 : memref<1xi32>) {
+      inits(%arg0 : memref<1xi32>) {
     ^bb(%i : i32):
     linalg.yield %i : i32
   }
@@ -100,7 +100,7 @@
   linalg.generic {
     indexing_maps =  [ affine_map<(i) -> (i)> ],
     iterator_types = ["random"]}
-      outs(%arg0 : memref<1xi32>) {
+      inits(%arg0 : memref<1xi32>) {
     ^bb(%i : i32):
     linalg.yield %i : i32
   }
@@ -113,7 +113,7 @@
   linalg.generic {
     indexing_maps =  [ affine_map<() -> (0, 0)> ],
     iterator_types = []}
-      outs(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
+      inits(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
     ^bb(%f : f32):
       linalg.yield %f: f32
   }
@@ -128,7 +128,7 @@
     indexing_maps =  [ affine_map<() -> (0)>, affine_map<() -> (0, 0)> ],
     iterator_types = []}
       ins(%cst : f32)
-      outs(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
+      inits(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
     ^bb(%0 : f32, %1 : f32):
       linalg.yield %0: f32
   }
@@ -141,7 +141,7 @@
   linalg.generic {
     indexing_maps =  [ affine_map<(i) -> (i)> ],
     iterator_types = ["parallel"]}
-      outs(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
+      inits(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
     ^bb(%0: f32):
       %1 = arith.constant 1: i4
       linalg.yield %1: i4
@@ -159,7 +159,7 @@
     ],
     iterator_types = ["parallel","parallel"]}
     ins(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>)
-   outs(%arg1 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
+   inits(%arg1 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
   ^bb(%0: f32, %1: f32):
       linalg.yield %1: f32
   }
@@ -178,7 +178,7 @@
     indexing_maps =  [ affine_map<() -> ()>, affine_map<() -> ()> ],
     iterator_types = []}
       ins(%arg0 : memref<f32>)
-     outs(%arg0 : memref<f32>) {
+     inits(%arg0 : memref<f32>) {
     ^bb1:
       linalg.yield %f0: f32
     ^bb2:
@@ -195,7 +195,7 @@
     indexing_maps =  [ affine_map<() -> ()> , affine_map<() -> ()> ],
     iterator_types = []}
     ins(%arg0 : memref<f32>)
-   outs(%arg0 : memref<f32>) {
+   inits(%arg0 : memref<f32>) {
   }
 }
 
@@ -206,7 +206,7 @@
   linalg.generic {
       indexing_maps =  [ affine_map<() -> ()>, affine_map<() -> ()> ],
       iterator_types = []}
-      outs(%arg0, %arg0 : memref<f32>, memref<f32>) {
+      inits(%arg0, %arg0 : memref<f32>, memref<f32>) {
     ^bb(%f: f32):
       linalg.yield %f: f32
   }
@@ -219,7 +219,7 @@
   linalg.generic {
     indexing_maps =  [ affine_map<() -> ()> ],
     iterator_types = []}
-      outs(%arg0 : memref<f32>) {
+      inits(%arg0 : memref<f32>) {
     ^bb(%i: i1):
     linalg.yield %i : i1
   }
@@ -232,7 +232,7 @@
   linalg.generic {
     indexing_maps =  [ affine_map<() -> ()> ],
     iterator_types = []}
-      outs(%arg0 : tensor<f32>) {
+      inits(%arg0 : tensor<f32>) {
     ^bb(%i: i1):
     linalg.yield %i : i1
   } -> tensor<f32>
@@ -245,7 +245,7 @@
   linalg.generic {
     indexing_maps = [ affine_map<(i) -> (i)> ],
     iterator_types = ["parallel"]}
-      outs(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
+      inits(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
     ^bb(%i: f32):
       %0 = arith.constant 0: i1
       linalg.yield %0: i1
@@ -261,7 +261,7 @@
     indexing_maps = [ affine_map<(i) -> (i)> , affine_map<(i) -> (i)> ],
     iterator_types = ["parallel"]}
        ins(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>)
-      outs(%arg1 : tensor<?xf32>) {
+      inits(%arg1 : tensor<?xf32>) {
     ^bb(%i: f32, %j: f32):
       linalg.yield %i: f32
   } -> tensor<f32>
@@ -274,7 +274,7 @@
   linalg.generic  {
     indexing_maps = [ affine_map<(i, j) -> (i, j)> ],
     iterator_types = ["parallel", "parallel"]}
-      outs(%arg0 : memref<?x?xf32>) {
+      inits(%arg0 : memref<?x?xf32>) {
     ^bb(%0: f32) :
       %1 = arith.addf %0, %0: f32
   }
@@ -298,7 +298,7 @@
 func.func @named_ops(%a3: memref<?x?x?xf32>, %b3: memref<?x?xf32>, %c3: memref<?x?x?xf32>) {
   // expected-error @+1 {{expected operand rank (2) to match the result rank of indexing_map #1 (3)}}
   linalg.batch_matmul ins(%a3, %b3: memref<?x?x?xf32>, memref<?x?xf32>)
-                     outs(%c3 : memref<?x?x?xf32>)
+                     inits(%c3 : memref<?x?x?xf32>)
   return
 }
 
@@ -316,7 +316,7 @@
 func.func @matching_inits(%m: memref<?x?xf32>, %t: tensor<?x?xf32>) {
   // expected-error @+1 {{expected type of operand #2 ('tensor<?x?xf32>') to match type of corresponding result ('tensor<?xf32>')}}
   %res = linalg.matmul ins(%m, %m : memref<?x?xf32>, memref<?x?xf32>)
-                      outs(%t : tensor<?x?xf32>)
+                      inits(%t : tensor<?x?xf32>)
                         -> tensor<?xf32>
   return
 }
@@ -327,7 +327,7 @@
 {
   %0 = tensor.empty(%arg0, %arg1) : tensor<?x?xf32>
   // expected-error @+1 {{expected the number of results (0) to be equal to the number of output tensors (1)}}
-  linalg.fill ins(%arg2 : f32) outs(%0 : tensor<?x?xf32>)
+  linalg.fill ins(%arg2 : f32) inits(%0 : tensor<?x?xf32>)
 }
 
 // -----
@@ -336,7 +336,7 @@
   (%arg0 : memref<?x?xf32>, %arg1 : f32) -> tensor<?x?xf32>
 {
   // expected-error @+1 {{expected the number of results (1) to be equal to the number of output tensors (0)}}
-  %0 = linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<?x?xf32>) -> tensor<?x?xf32>
+  %0 = linalg.fill ins(%arg1 : f32) inits(%arg0 : memref<?x?xf32>) -> tensor<?x?xf32>
   return %0 : tensor<?x?xf32>
 }
 
@@ -346,7 +346,7 @@
   (%arg0 : tensor<?x?xf32>, %arg1 : f32) -> memref<?x?xf32>
 {
   // expected-error @+1 {{result #0 must be ranked tensor of any type values, but got 'memref<?x?xf32>'}}
-  %0 = linalg.fill ins(%arg1 : f32) outs(%arg0 : tensor<?x?xf32>) -> memref<?x?xf32>
+  %0 = linalg.fill ins(%arg1 : f32) inits(%arg0 : tensor<?x?xf32>) -> memref<?x?xf32>
   return %0 : memref<?x?xf32>
 }
 
@@ -355,7 +355,7 @@
 func.func @invalid_static_matmul(%arg0: memref<2x4xf32>, %arg1: memref<3x4xf32>, %arg2: memref<2x4xf32>) {
   // expected-error @+1 {{inferred input/output operand #1 has shape's dimension #0 to be 4, but found 3}}
   linalg.matmul ins(%arg0, %arg1 : memref<2x4xf32>, memref<3x4xf32>)
-                      outs(%arg2 :memref<2x4xf32>)
+                      inits(%arg2 :memref<2x4xf32>)
   return
 }
 
@@ -366,7 +366,7 @@
   linalg.conv_2d_nhwc_hwcf
     { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %filter : memref<1x3x4x2xf32>, memref<3x2x2x1xf32>)
-    outs(%output : memref<1x2x3x1xf32>)
+    inits(%output : memref<1x2x3x1xf32>)
   return
 }
 
@@ -382,7 +382,7 @@
 
 func.func @invalid_reverse(%A: memref<5xf32>, %B: memref<5xf32>) {
   // expected-error @+1 {{unexpected result less than 0 at expression #0 in}}
-  linalg.generic #attrs ins(%A: memref<5xf32>) outs(%B: memref<5xf32>) {
+  linalg.generic #attrs ins(%A: memref<5xf32>) inits(%B: memref<5xf32>) {
                 ^bb0(%a: f32, %b: f32):
                 linalg.yield %a : f32
         }
@@ -396,7 +396,7 @@
     -> tensor<64xf32> {
    %add = linalg.map
           ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>)
-          outs(%init:tensor<64xf32>)
+          inits(%init:tensor<64xf32>)
           (%lhs_elem: f32, %rhs_elem: f32) {
             %0 = arith.addf %lhs_elem, %rhs_elem: f32
             // expected-error @+1{{'linalg.yield' op expected number of yield values (1) to match the number of operands of the enclosing LinalgOp (2)}}
@@ -413,7 +413,7 @@
   // expected-error@+1{{'linalg.map' op expects number of operands to match the arity of mapper, but got: 2 and 3}}
   %add = linalg.map
       ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>)
-      outs(%init:tensor<64xf32>)
+      inits(%init:tensor<64xf32>)
       (%lhs_elem: f32, %rhs_elem: f32, %extra_elem: f32) {
         %0 = arith.addf %lhs_elem, %rhs_elem: f32
         linalg.yield %0: f32
@@ -429,7 +429,7 @@
     // expected-error@+1{{'linalg.map' op expected element type of input 'f32' to match bbArg type 'f64'}}
   %add = linalg.map
       ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>)
-      outs(%init:tensor<64xf32>)
+      inits(%init:tensor<64xf32>)
       (%lhs_elem: f64, %rhs_elem: f64) {
         %0 = arith.addf %lhs_elem, %rhs_elem: f64
         linalg.yield %0: f64
@@ -445,7 +445,7 @@
     // expected-error@+1{{'linalg.map' op expected shape of input (64, 64) to match shape of output (32)}}
   %add = linalg.map
       ins(%lhs, %rhs : tensor<64x64xf32>, tensor<64x64xf32>)
-      outs(%init:tensor<32xf32>)
+      inits(%init:tensor<32xf32>)
       (%lhs_elem: f32, %rhs_elem: f32) {
         %0 = arith.addf %lhs_elem, %rhs_elem: f32
         linalg.yield %0: f32
@@ -461,7 +461,7 @@
   // expected-error @+1 {{'linalg.reduce' op init dimensions [16, 64] doesn't match input dimensions after reduction [16, 32]}}
   %reduce = linalg.reduce
       ins(%input:tensor<16x32x64xf32>)
-      outs(%init:tensor<16x64xf32>)
+      inits(%init:tensor<16x64xf32>)
       dimensions = [2]
       (%in: f32, %out: f32) {
         %0 = arith.addf %in, %out: f32
@@ -477,7 +477,7 @@
   // expected-error @+1 {{'linalg.reduce' op dimensions for reduction should be in the range [0, 2].}}
   %reduce = linalg.reduce
       ins(%input:tensor<16x32x64xf32>)
-      outs(%init:tensor<16x64xf32>)
+      inits(%init:tensor<16x64xf32>)
       dimensions = [3]
       (%in: f32, %out: f32) {
         %0 = arith.addf %in, %out: f32
@@ -493,7 +493,7 @@
   // expected-error @+1 {{'linalg.reduce' op attribute 'dimensions' failed to satisfy constraint: i64 dense array attribute should be in increasing order}}
   %reduce = linalg.reduce
       ins(%input:tensor<16x32x64xf32>)
-      outs(%init:tensor<16xf32>)
+      inits(%init:tensor<16xf32>)
       dimensions = [1, 1]
       (%in: f32, %out: f32) {
         %0 = arith.addf %in, %out: f32
@@ -509,7 +509,7 @@
   // expected-error @+1 {{'linalg.reduce' op attribute 'dimensions' failed to satisfy constraint: i64 dense array attribute should be in increasing order}}
   %reduce = linalg.reduce
       ins(%input:tensor<16x32x64xf32>)
-      outs(%init:tensor<16xf32>)
+      inits(%init:tensor<16xf32>)
       dimensions = [2, 1]
       (%in: f32, %out: f32) {
         %0 = arith.addf %in, %out: f32
@@ -525,7 +525,7 @@
   // expected-error @+1 {{'linalg.reduce' op number of dimensions after reduction 1 doesn't match the init rank 2}}
   %reduce = linalg.reduce
       ins(%input:tensor<16x32x64xf32>)
-      outs(%init:tensor<16x64xf32>)
+      inits(%init:tensor<16x64xf32>)
       dimensions = [1, 2]
       (%in: f32, %out: f32) {
         %0 = arith.addf %in, %out: f32
@@ -543,7 +543,7 @@
   // expected-error @+1{{'linalg.reduce' op mismatching number of operands and block arguments}}
   %reduce, %reduce2 = linalg.reduce
       ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xf32>)
-      outs(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf32>)
+      inits(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf32>)
       dimensions = [1]
       (%in: f32, %out: f32) {
         %0 = arith.addf %in, %out: f32
@@ -561,7 +561,7 @@
   // expected-error @+1{{'linalg.reduce' op input element type 'f32' does not match corresponding block argument type 'f64'}}
   %reduce, %reduce2 = linalg.reduce
       ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xf32>)
-      outs(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf32>)
+      inits(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf32>)
       dimensions = [1]
       (%in1: f32, %in2: f64, %out1: f32, %out2: f64) {
         %0 = arith.addf %in1, %out1: f32
@@ -580,7 +580,7 @@
   // expected-error @+1{{'linalg.reduce' op output element type 'f64' does not match corresponding block argument type 'f32'}}
   %reduce, %reduce2 = linalg.reduce
       ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xf32>)
-      outs(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf64>)
+      inits(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf64>)
       dimensions = [1]
       (%in1: f32, %in2: f32, %out1: f32, %out2: f32) {
         %0 = arith.addf %in1, %out1: f32
@@ -597,7 +597,7 @@
   // expected-error @+1{{'linalg.reduce' op expects all inputs to have the same shapes. Shape at input-index 1 is not equal to the shape at input-index 0.}}
   %reduce, %reduce2 = linalg.reduce
       ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<17x32x64xf32>)
-      outs(%init1, %init2 : tensor<16x64xf32>, tensor<17x64xf32>)
+      inits(%init1, %init2 : tensor<16x64xf32>, tensor<17x64xf32>)
       dimensions = [1]
       (%in1: f32, %in2: f32, %out1: f32, %out2: f32) {
         %0 = arith.addf %in1, %out1: f32
@@ -615,7 +615,7 @@
   // expected-error @+1{{'linalg.reduce' op expects all outputs to have the same shapes. Shape at output-index 1 is not equal to the shape at output-index 0.}}
   %reduce, %reduce2 = linalg.reduce
       ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xf32>)
-      outs(%init1, %init2 : tensor<16x64xf32>, tensor<17x64xf32>)
+      inits(%init1, %init2 : tensor<16x64xf32>, tensor<17x64xf32>)
       dimensions = [1]
       (%in1: f32, %in2: f32, %out1: f32, %out2: f32) {
         %0 = arith.addf %in1, %out1: f32
@@ -632,7 +632,7 @@
   // expected-error @+1 {{'linalg.transpose' op permutation is not valid}}
   %transpose = linalg.transpose
       ins(%input:tensor<16x32x64xf32>)
-      outs(%init:tensor<32x64x16xf32>)
+      inits(%init:tensor<32x64x16xf32>)
       permutation = [1, 1, 2]
   func.return %transpose : tensor<32x64x16xf32>
 }
@@ -644,7 +644,7 @@
   // expected-error @+1 {{'linalg.transpose' op dim(result, 0) = 32 doesn't match dim(input, permutation[0]) = 16}}
   %transpose = linalg.transpose
       ins(%input:tensor<16x32x64xf32>)
-      outs(%init:tensor<32x64x16xf32>)
+      inits(%init:tensor<32x64x16xf32>)
       permutation = [0, 1, 2]
   func.return %transpose : tensor<32x64x16xf32>
 }
@@ -657,7 +657,7 @@
   // expected-error @+1 {{'linalg.transpose' op size of permutation 2 does not match the argument rank 3}}
   %transpose = linalg.transpose
       ins(%input:tensor<16x32x64xf32>)
-      outs(%init:tensor<32x64x16xf32>)
+      inits(%init:tensor<32x64x16xf32>)
       permutation = [1, 0]
   func.return %transpose : tensor<32x64x16xf32>
 }
@@ -669,7 +669,7 @@
   // expected-error @+1 {{'linalg.transpose' op input rank 2 does not match init rank 3}}
   %transpose = linalg.transpose
       ins(%input:tensor<16x32xf32>)
-      outs(%init:tensor<32x64x16xf32>)
+      inits(%init:tensor<32x64x16xf32>)
       permutation = [1, 0, 2]
   func.return %transpose : tensor<32x64x16xf32>
 }
@@ -682,7 +682,7 @@
   // expected-error @+1 {{'linalg.broadcast' op input rank plus added dimensions does not match init rank. }}
   %bcast = linalg.broadcast
       ins(%input:tensor<4x16xf32>)
-      outs(%init:tensor<4x8x16xf32>)
+      inits(%init:tensor<4x8x16xf32>)
       dimensions = [1, 2]
   func.return %bcast : tensor<4x8x16xf32>
 }
@@ -695,7 +695,7 @@
   // expected-error @+1 {{'linalg.broadcast' op dimension 0 is out of range. expected range: [0, 2], got: 5}}
   %bcast = linalg.broadcast
       ins(%input:tensor<4x16xf32>)
-      outs(%init:tensor<4x8x16xf32>)
+      inits(%init:tensor<4x8x16xf32>)
       dimensions = [5]
   func.return %bcast : tensor<4x8x16xf32>
 }
@@ -708,7 +708,7 @@
   // expected-error @+1 {{'linalg.broadcast' op input dim 0 should match init dim 0. input: 4, init: 5}}
   %bcast = linalg.broadcast
       ins(%input:tensor<4x16xf32>)
-      outs(%init:tensor<5x8x16xf32>)
+      inits(%init:tensor<5x8x16xf32>)
       dimensions = [1]
   func.return %bcast : tensor<5x8x16xf32>
 }
@@ -721,7 +721,7 @@
   // expected-error @+1 {{'linalg.broadcast' op input dim 0 should match init dim 0. input: 1, init: 4}}
   %bcast = linalg.broadcast
       ins(%input:tensor<1x16xf32>)
-      outs(%init:tensor<4x?x16xf32>)
+      inits(%init:tensor<4x?x16xf32>)
       dimensions = [1]
   func.return %bcast : tensor<4x?x16xf32>
 }
diff --git a/mlir/test/Dialect/Linalg/library-calls.mlir b/mlir/test/Dialect/Linalg/library-calls.mlir
--- a/mlir/test/Dialect/Linalg/library-calls.mlir
+++ b/mlir/test/Dialect/Linalg/library-calls.mlir
@@ -14,11 +14,11 @@
   %C = memref.alloc(%x, %y) : memref<?x?xf32>
 
   // CHECK: call @linalg_fill_f32_viewsxsxf32({{.*}}) : (f32, memref<?x?xf32, {{.*}}>)
-  linalg.fill ins(%f0 : f32) outs(%C : memref<?x?xf32>)
+  linalg.fill ins(%f0 : f32) inits(%C : memref<?x?xf32>)
 
   // CHECK:  call @linalg_matmul_viewsxsxf32_viewsxsxf32_viewsxsxf32({{.*}}) : (memref<?x?xf32, {{.*}}>, memref<?x?xf32, {{.*}}>, memref<?x?xf32, {{.*}}>) -> ()
   linalg.matmul ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>)
-                outs(%C: memref<?x?xf32>)
+                inits(%C: memref<?x?xf32>)
   return %C : memref<?x?xf32>
 }
 
diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir
--- a/mlir/test/Dialect/Linalg/loops.mlir
+++ b/mlir/test/Dialect/Linalg/loops.mlir
@@ -15,7 +15,7 @@
   %B = memref.view %arg0[%c0][%K, %N] : memref<?xi8> to memref<?x?xf32>
   %C = memref.view %arg0[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32>
   linalg.matmul ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>)
-               outs(%C: memref<?x?xf32>)
+               inits(%C: memref<?x?xf32>)
   return
 }
 // CHECK-LABEL: func @matmul(%{{.*}}: memref<?xi8>,
@@ -60,7 +60,7 @@
   %3 = memref.view %arg0[%c0][%M] : memref<?xi8> to memref<?xf32>
   %4 = memref.view %arg0[%c0][%N] : memref<?xi8> to memref<?xf32>
   linalg.matvec ins(%2, %3: memref<?x?xf32>, memref<?xf32>)
-               outs(%4 : memref<?xf32>)
+               inits(%4 : memref<?xf32>)
   return
 }
 // CHECK-LABEL: func @matvec(%{{.*}}: memref<?xi8>,
@@ -101,7 +101,7 @@
   %2 = memref.view %arg0[%c0][%M] : memref<?xi8> to memref<?xf32>
   %3 = memref.view %arg0[%c0][] : memref<?xi8> to memref<f32>
   linalg.dot ins(%1, %2 : memref<?xf32>, memref<?xf32>)
-            outs(%3 : memref<f32>)
+            inits(%3 : memref<f32>)
   return
 }
 // CHECK-LABEL: func @dot(%{{.*}}: memref<?xi8>,
@@ -135,7 +135,7 @@
                    %arg3: memref<i32>) {
   // Verifies that we use the correct arith operations for integers.
   linalg.dot ins(%arg0, %arg1 : memref<?xi32>, memref<?xi32>)
-             outs(%arg3 : memref<i32>)
+             inits(%arg3 : memref<i32>)
   return
 }
 // CHECK-LABEL: func @dot_int(
@@ -148,7 +148,7 @@
                     %arg3: memref<i1>) {
   // Verifies that we use the correct (saturating) arith operations for booleans.
   linalg.dot ins(%arg0, %arg1 : memref<?xi1>, memref<?xi1>)
-             outs(%arg3 : memref<i1>)
+             inits(%arg3 : memref<i1>)
   return
 }
 // CHECK-LABEL: func @dot_bool(
@@ -160,7 +160,7 @@
 func.func @dot_view(%arg0: memref<?xf32, strided<[1], offset: ?>>, %arg1: memref<?xf32, strided<[1], offset: ?>>, %arg2: memref<f32>) {
   linalg.dot ins(%arg0, %arg1 : memref<?xf32, strided<[1], offset: ?>>,
                                 memref<?xf32, strided<[1], offset: ?>>)
-            outs(%arg2:  memref<f32>)
+            inits(%arg2:  memref<f32>)
   return
 }
 // CHECK-LABEL: func @dot_view(
@@ -186,7 +186,7 @@
 //       CHECKPARALLEL:   store %[[res]], %{{.*}}[] : memref<f32>
 
 func.func @fill_view(%arg0: memref<?xf32, strided<[1], offset: ?>>, %arg1: f32) {
-  linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<?xf32, strided<[1], offset: ?>>)
+  linalg.fill ins(%arg1 : f32) inits(%arg0 : memref<?xf32, strided<[1], offset: ?>>)
   return
 }
 // CHECK-LABEL: func @fill_view(
@@ -200,7 +200,7 @@
 //       CHECKPARALLEL:     store %{{.*}}, %{{.*}}[%{{.*}}] : memref<?xf32, strided<[1], offset: ?>>
 
 func.func @fill_view0(%arg0: memref<f32>, %arg1: f32) {
-  linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<f32>)
+  linalg.fill ins(%arg1 : f32) inits(%arg0 : memref<f32>)
   return
 }
 // CHECK-LABEL: func @fill_view0(%{{.*}}: memref<f32>, %{{.*}}: f32) {
@@ -210,7 +210,7 @@
 //       CHECKPARALLEL:   store %{{.*}}, %{{.*}}[] : memref<f32>
 
 func.func @fill_view3(%arg0: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>, %arg1: f32) {
-  linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
+  linalg.fill ins(%arg1 : f32) inits(%arg0 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
   return
 }
 // CHECK-LABEL: func @fill_view3(
@@ -230,7 +230,7 @@
     iterator_types = ["parallel"],
     indexing_maps = [ affine_map<(i) -> (i)>, affine_map<(i) -> (i)>] }
     ins(%arg0: memref<?xf32, strided<[1], offset: ?>>)
-   outs(%arg1: memref<?xf32, strided<[1], offset: ?>>) {
+   inits(%arg1: memref<?xf32, strided<[1], offset: ?>>) {
     ^bb0(%a: f32, %b: f32):
       linalg.yield %a : f32
   }
@@ -264,7 +264,7 @@
 func.func @generic_region(%arg0: memref<?x?xf32, strided<[?, 1], offset: ?>>, %arg1: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>, %arg2: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>) {
   linalg.generic #trait2
     ins(%arg0: memref<?x?xf32, strided<[?, 1], offset: ?>>)
-   outs(%arg1, %arg2 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>,
+   inits(%arg1, %arg2 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>,
                        memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>) {
     ^bb0(%a: f32, %b: f32, %c: f32):
       %d = arith.mulf %a, %b : f32
@@ -309,7 +309,7 @@
         %arg2: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>) {
   linalg.generic #trait4
       ins(%arg0 : memref<?x?xf32, strided<[?, 1], offset: ?>>)
-     outs(%arg1, %arg2 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>,
+     inits(%arg1, %arg2 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>,
                          memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>) {
     ^bb0(%a: f32, %b: f32, %c: f32):
       %i = linalg.index 0 : index
@@ -377,7 +377,7 @@
 {
   linalg.generic #trait_broadcast
       ins(%arg0 : memref<f32>)
-     outs(%arg1 : memref<3x4xf32>) {
+     inits(%arg1 : memref<3x4xf32>) {
     ^bb(%a: f32, %b: f32) :
       linalg.yield %a : f32
   }
@@ -403,7 +403,7 @@
 {
   linalg.generic #trait_broadcast
       ins(%arg0 : f32)
-     outs(%arg1 : memref<3x4xf32>) {
+     inits(%arg1 : memref<3x4xf32>) {
     ^bb(%a: f32, %b: f32) :
       linalg.yield %a : f32
   }
@@ -427,7 +427,7 @@
 {
   linalg.generic #trait_broadcast
       ins(%arg0 : memref<i32>)
-     outs(%arg1 : memref<3x4xi32>) {
+     inits(%arg1 : memref<3x4xi32>) {
     ^bb(%a: i32, %b: i32) :
       %i = linalg.index 0 : index
       %j = linalg.index 1 : index
@@ -477,7 +477,7 @@
 {
   linalg.generic #trait_reduce_1D
       ins(%arg0 : memref<?xf32>)
-     outs(%arg1 : memref<f32>) {
+     inits(%arg1 : memref<f32>) {
     ^bb(%a: f32, %b: f32) :
       %0 = arith.addf %a, %b : f32
       linalg.yield %0 : f32
@@ -523,7 +523,7 @@
 {
   linalg.generic #trait_reduce_init_1D
       ins(%arg0, %arg1 : memref<?xf32>, memref<f32>)
-     outs(%arg2 : memref<f32>) {
+     inits(%arg2 : memref<f32>) {
     ^bb(%a: f32, %b: f32, %c: f32) :
       %i = linalg.index 0 : index
       %0 = arith.constant 0 : index
@@ -567,7 +567,7 @@
 }
 func.func @generic_const_init(%arg0: memref<?xf32>) {
         %cst = arith.constant 1.0 : f32
-  linalg.generic #trait_const_fill outs(%arg0 : memref<?xf32>) {
+  linalg.generic #trait_const_fill inits(%arg0 : memref<?xf32>) {
     ^bb0(%arg1: f32):
       linalg.yield %cst : f32
     }
@@ -601,7 +601,7 @@
 {
   linalg.generic #scalar_trait
     ins(%arg0, %arg1 : memref<f32>, memref<f32>)
-   outs(%arg2 : memref<f32>) {
+   inits(%arg2 : memref<f32>) {
   ^bb(%a : f32, %b : f32, %c : f32) :
     %result = scf.if %arg3 -> (f32) {
       scf.yield %a : f32
@@ -643,7 +643,7 @@
 //----------------------------------------------------------------------------//
 func.func @named_batch_matmul(%A: memref<?x?x?xf32>, %B: memref<?x?x?xf32>, %C: memref<?x?x?xf32>) {
   linalg.batch_matmul ins(%A, %B : memref<?x?x?xf32>, memref<?x?x?xf32>)
-                     outs(%C : memref<?x?x?xf32>)
+                     inits(%C : memref<?x?x?xf32>)
   return
 }
 // CHECK-LABEL: @named_batch_matmul
@@ -685,7 +685,7 @@
 
 func.func @conv1d_no_symbols(%in : memref<?xf32>, %filter : memref<?xf32>, %out : memref<?xf32>) -> () {
   linalg.conv_1d ins(%in, %filter : memref<?xf32>, memref<?xf32>)
-                outs(%out : memref<?xf32>)
+                inits(%out : memref<?xf32>)
   return
 }
 
@@ -728,7 +728,7 @@
 
 func.func @conv2d_no_symbols(%in : memref<?x?xf32>, %filter : memref<?x?xf32>, %out : memref<?x?xf32>) -> () {
   linalg.conv_2d ins(%in, %filter : memref<?x?xf32>, memref<?x?xf32>)
-                outs(%out: memref<?x?xf32>)
+                inits(%out: memref<?x?xf32>)
   return
 }
 // CHECK-LABEL: @conv2d_no_symbols
@@ -781,7 +781,7 @@
 
 func.func @conv3d_no_symbols(%in : memref<?x?x?xf32>, %filter : memref<?x?x?xf32>, %out : memref<?x?x?xf32>) -> () {
   linalg.conv_3d ins(%in, %filter : memref<?x?x?xf32>, memref<?x?x?xf32>)
-                outs(%out : memref<?x?x?xf32>)
+                inits(%out : memref<?x?x?xf32>)
   return
 }
 
@@ -856,7 +856,7 @@
     iterator_types = ["parallel"],
     indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>]}
     ins(%0: memref<?xi32, strided<[1], offset: ?>>)
-   outs(%1: memref<?xi32, strided<[1], offset: ?>>) {
+   inits(%1: memref<?xi32, strided<[1], offset: ?>>) {
     ^bb0(%a: i32, %b: i32):
       linalg.yield %a : i32
   }
diff --git a/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir b/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir
--- a/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir
+++ b/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir
@@ -57,7 +57,7 @@
 // CHECK:      %[[R2c:.+]] = linalg.generic
 // CHECK-SAME:   indexing_maps = [#[[$MAP4]], #[[$MAP5]]]
 // CHECK-SAME:   iterator_types = ["parallel", "parallel", "parallel", "parallel"]
-// CHECK:        ins(%{{.*}} : tensor<1x28x28x1xf32>) outs(%{{.*}} : tensor<1x32x32x1xf32>)
+// CHECK:        ins(%{{.*}} : tensor<1x28x28x1xf32>) inits(%{{.*}} : tensor<1x32x32x1xf32>)
 // CHECK:      ^bb0(%[[VAL:.+]]: f32, %{{.*}}: f32)
 // CHECK:        linalg.yield %[[VAL]] : f32
 // CHECK:      return %[[R2c:.+]]
diff --git a/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir b/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir
--- a/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir
+++ b/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir
@@ -34,7 +34,7 @@
     iterator_types = ["parallel", "parallel"]
   }
   ins(%arg0: tensor<10x34xf32>)
-  outs(%arg1: tensor<10x34xf32>) {
+  inits(%arg1: tensor<10x34xf32>) {
   ^bb0(%0: f32, %1: f32):
     %i = linalg.index 0 : index
     %j = linalg.index 1 : index
@@ -61,7 +61,7 @@
   // CHECK:        %[[LOOPRES:.+]] = scf.for %[[I2:.+]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ITERARG_2:.+]] = %[[SLICE_2]])
   // CHECK:          %[[INSLICE_2:.+]] = tensor.extract_slice %[[SLICE_2_IN]][0, %[[I2]]] [2, 8] [1, 1]
   // CHECK:          %[[OUTSLICE_2:.+]] = tensor.extract_slice %[[ITERARG_2]][0, %[[I2]]] [2, 8] [1, 1]
-  // CHECK:          %[[RESSLICE_1:.+]] = linalg.generic {{.*}} ins(%[[INSLICE_2]] : tensor<2x8xf32>) outs(%[[OUTSLICE_2]] : tensor<2x8xf32>)
+  // CHECK:          %[[RESSLICE_1:.+]] = linalg.generic {{.*}} ins(%[[INSLICE_2]] : tensor<2x8xf32>) inits(%[[OUTSLICE_2]] : tensor<2x8xf32>)
   // CHECK:          %[[RESPARTIAL:.+]] = tensor.insert_slice %[[RESSLICE_1]] into %[[ITERARG_2]]
   // CHECK:          scf.yield %[[RESPARTIAL]]
 
@@ -135,7 +135,7 @@
     iterator_types = ["parallel", "parallel"]
   }
   ins(%arg0: tensor<10x34xf32>)
-  outs(%arg1: tensor<10x34xf32>) {
+  inits(%arg1: tensor<10x34xf32>) {
   ^bb0(%0: f32, %1: f32):
     %i = linalg.index 0 : index
     %j = linalg.index 1 : index
@@ -154,7 +154,7 @@
   // CHECK:        %[[LOOPRES:.+]] = scf.for %[[I2:.+]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ITERARG_2:.+]] = %[[SLICE_2]])
   // CHECK:          %[[INSLICE_2:.+]] = tensor.extract_slice %[[SLICE_2_IN]][0, %[[I2]]] [2, 8] [1, 1]
   // CHECK:          %[[OUTSLICE_2:.+]] = tensor.extract_slice %[[ITERARG_2]][0, %[[I2]]] [2, 8] [1, 1]
-  // CHECK:          %[[RESSLICE_1:.+]] = linalg.generic {{.*}} ins(%[[INSLICE_2]] : tensor<2x8xf32>) outs(%[[OUTSLICE_2]] : tensor<2x8xf32>)
+  // CHECK:          %[[RESSLICE_1:.+]] = linalg.generic {{.*}} ins(%[[INSLICE_2]] : tensor<2x8xf32>) inits(%[[OUTSLICE_2]] : tensor<2x8xf32>)
   // CHECK:          %[[RESPARTIAL:.+]] = tensor.insert_slice %[[RESSLICE_1]] into %[[ITERARG_2]]
   // CHECK:          scf.yield %[[RESPARTIAL]]
 
diff --git a/mlir/test/Dialect/Linalg/named-ops.mlir b/mlir/test/Dialect/Linalg/named-ops.mlir
--- a/mlir/test/Dialect/Linalg/named-ops.mlir
+++ b/mlir/test/Dialect/Linalg/named-ops.mlir
@@ -4,11 +4,11 @@
 func.func @depthwise_conv_1d_nwc_wcm(%input: tensor<1x12x8xf32>, %filter: tensor<3x8x8xf32>) -> tensor<1x10x8x8xf32> {
   %zero = arith.constant 0.000000e+00 : f32
   %init = tensor.empty() : tensor<1x10x8x8xf32>
-  %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<1x10x8x8xf32>) -> tensor<1x10x8x8xf32>
+  %fill = linalg.fill ins(%zero : f32) inits(%init : tensor<1x10x8x8xf32>) -> tensor<1x10x8x8xf32>
   // CHECK: depthwise_conv_1d_nwc_wcm
   %0 = linalg.depthwise_conv_1d_nwc_wcm {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %filter : tensor<1x12x8xf32>, tensor<3x8x8xf32>)
-    outs(%fill : tensor<1x10x8x8xf32>) -> tensor<1x10x8x8xf32>
+    inits(%fill : tensor<1x10x8x8xf32>) -> tensor<1x10x8x8xf32>
   return %0 : tensor<1x10x8x8xf32>
 }
 
@@ -18,11 +18,11 @@
 func.func @depthwise_conv_1d_nwc_wc(%input: tensor<1x12x8xf32>, %filter: tensor<3x8xf32>) -> tensor<1x10x8xf32> {
   %zero = arith.constant 0.000000e+00 : f32
   %init = tensor.empty() : tensor<1x10x8xf32>
-  %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<1x10x8xf32>) -> tensor<1x10x8xf32>
+  %fill = linalg.fill ins(%zero : f32) inits(%init : tensor<1x10x8xf32>) -> tensor<1x10x8xf32>
   // CHECK: depthwise_conv_1d_nwc_wc
   %0 = linalg.depthwise_conv_1d_nwc_wc {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %filter : tensor<1x12x8xf32>, tensor<3x8xf32>)
-    outs(%fill : tensor<1x10x8xf32>) -> tensor<1x10x8xf32>
+    inits(%fill : tensor<1x10x8xf32>) -> tensor<1x10x8xf32>
   return %0 : tensor<1x10x8xf32>
 }
 
@@ -32,15 +32,15 @@
 func.func @depthwise_conv_2d_nhwc_hwcm_tensor(%input: tensor<2x4x5x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x3x4x2x3xf32> {
   %zero = arith.constant 0.000000e+00 : f32
   %init = tensor.empty() : tensor<2x3x4x2x3xf32>
-  %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x3x4x2x3xf32>) -> tensor<2x3x4x2x3xf32>
+  %fill = linalg.fill ins(%zero : f32) inits(%init : tensor<2x3x4x2x3xf32>) -> tensor<2x3x4x2x3xf32>
   // CHECK:      %{{.+}} = linalg.depthwise_conv_2d_nhwc_hwcm
   // CHECK-SAME:   {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<2x4x5x2xf32>, tensor<2x2x2x3xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<2x3x4x2x3xf32>)
+  // CHECK-SAME:   inits(%{{.+}} : tensor<2x3x4x2x3xf32>)
   %0 = linalg.depthwise_conv_2d_nhwc_hwcm
      { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> }
      ins(%input, %filter : tensor<2x4x5x2xf32>, tensor<2x2x2x3xf32>)
-    outs(%fill : tensor<2x3x4x2x3xf32>) -> tensor<2x3x4x2x3xf32>
+    inits(%fill : tensor<2x3x4x2x3xf32>) -> tensor<2x3x4x2x3xf32>
   return %0 : tensor<2x3x4x2x3xf32>
 }
 
@@ -49,11 +49,11 @@
   // CHECK:      linalg.depthwise_conv_2d_nhwc_hwcm
   // CHECK-SAME:   {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : memref<2x3x4x2x3xf32>)
+  // CHECK-SAME:   inits(%{{.+}} : memref<2x3x4x2x3xf32>)
   linalg.depthwise_conv_2d_nhwc_hwcm
      { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> }
      ins(%input, %filter : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>)
-    outs(%output : memref<2x3x4x2x3xf32>)
+    inits(%output : memref<2x3x4x2x3xf32>)
   return
 }
 
@@ -63,10 +63,10 @@
   // CHECK:      %{{.+}} = linalg.depthwise_conv_1d_nw
   // CHECK-SAME:   {dilations = dense<1> : vector<1xi64>, strides = dense<2> : vector<1xi64>}
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<1x113x96xf32>, tensor<3x96xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<1x56x96xf32>) -> tensor<1x56x96xf32>
+  // CHECK-SAME:   inits(%{{.+}} : tensor<1x56x96xf32>) -> tensor<1x56x96xf32>
   %0 = linalg.depthwise_conv_1d_nwc_wc {dilations = dense<1> : vector<1xi64>, strides = dense<2> : vector<1xi64>}
          ins(%input, %filter: tensor<1x113x96xf32>, tensor<3x96xf32>)
-         outs(%init: tensor<1x56x96xf32>) -> tensor<1x56x96xf32>
+         inits(%init: tensor<1x56x96xf32>) -> tensor<1x56x96xf32>
   return %0: tensor<1x56x96xf32>
 }
 
@@ -76,10 +76,10 @@
   // CHECK:      %{{.+}} = linalg.depthwise_conv_2d_nhwc_hwc
   // CHECK-SAME:   {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>}
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<1x113x113x96xf32>, tensor<3x3x96xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32>
+  // CHECK-SAME:   inits(%{{.+}} : tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32>
   %0 = linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>}
          ins(%input, %filter: tensor<1x113x113x96xf32>, tensor<3x3x96xf32>)
-         outs(%init: tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32>
+         inits(%init: tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32>
   return %0: tensor<1x56x56x96xf32>
 }
 
@@ -88,10 +88,10 @@
   // CHECK:      linalg.depthwise_conv_2d_nhwc_hwc
   // CHECK-SAME:   {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>}
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : memref<1x113x113x96xf32>, memref<3x3x96xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : memref<1x56x56x96xf32>)
+  // CHECK-SAME:   inits(%{{.+}} : memref<1x56x56x96xf32>)
   linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>}
     ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>)
-    outs(%output: memref<1x56x56x96xf32>)
+    inits(%output: memref<1x56x56x96xf32>)
   return
 }
 
@@ -101,10 +101,10 @@
   // CHECK:      %{{.+}} = linalg.depthwise_conv_2d_nchw_chw
   // CHECK-SAME:   {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>}
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<1x96x113x113xf32>, tensor<96x3x3xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<1x96x56x56xf32>) -> tensor<1x96x56x56xf32>
+  // CHECK-SAME:   inits(%{{.+}} : tensor<1x96x56x56xf32>) -> tensor<1x96x56x56xf32>
   %0 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>}
          ins(%input, %filter: tensor<1x96x113x113xf32>, tensor<96x3x3xf32>)
-         outs(%init: tensor<1x96x56x56xf32>) -> tensor<1x96x56x56xf32>
+         inits(%init: tensor<1x96x56x56xf32>) -> tensor<1x96x56x56xf32>
   return %0: tensor<1x96x56x56xf32>
 }
 
@@ -113,25 +113,25 @@
   // CHECK:      linalg.depthwise_conv_2d_nchw_chw
   // CHECK-SAME:   {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>}
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : memref<1x96x113x113xf32>, memref<96x3x3xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : memref<1x96x56x56xf32>)
+  // CHECK-SAME:   inits(%{{.+}} : memref<1x96x56x56xf32>)
   linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>}
     ins(%input, %filter: memref<1x96x113x113xf32>, memref<96x3x3xf32>)
-    outs(%output: memref<1x96x56x56xf32>)
+    inits(%output: memref<1x96x56x56xf32>)
   return
 }
 
 func.func @depthwise_conv_2d_nhwc_hwcm_tensor_dilated(%input: tensor<2x8x9x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x6x7x2x3xf32> {
   %zero = arith.constant 0.000000e+00 : f32
   %init = tensor.empty() : tensor<2x6x7x2x3xf32>
-  %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x6x7x2x3xf32>) -> tensor<2x6x7x2x3xf32>
+  %fill = linalg.fill ins(%zero : f32) inits(%init : tensor<2x6x7x2x3xf32>) -> tensor<2x6x7x2x3xf32>
   // CHECK:      %{{.+}} = linalg.depthwise_conv_2d_nhwc_hwcm
   // CHECK-SAME:   {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<2x8x9x2xf32>, tensor<2x2x2x3xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<2x6x7x2x3xf32>)
+  // CHECK-SAME:   inits(%{{.+}} : tensor<2x6x7x2x3xf32>)
   %0 = linalg.depthwise_conv_2d_nhwc_hwcm
      { dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> }
      ins(%input, %filter : tensor<2x8x9x2xf32>, tensor<2x2x2x3xf32>)
-    outs(%fill : tensor<2x6x7x2x3xf32>) -> tensor<2x6x7x2x3xf32>
+    inits(%fill : tensor<2x6x7x2x3xf32>) -> tensor<2x6x7x2x3xf32>
   return %0 : tensor<2x6x7x2x3xf32>
 }
 
@@ -140,11 +140,11 @@
   // CHECK:      linalg.depthwise_conv_2d_nhwc_hwcm
   // CHECK-SAME:   {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : memref<2x8x9x2xf32>, memref<2x2x2x3xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : memref<2x6x7x2x3xf32>)
+  // CHECK-SAME:   inits(%{{.+}} : memref<2x6x7x2x3xf32>)
   linalg.depthwise_conv_2d_nhwc_hwcm
      { dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> }
      ins(%input, %filter : memref<2x8x9x2xf32>, memref<2x2x2x3xf32>)
-    outs(%output : memref<2x6x7x2x3xf32>)
+    inits(%output : memref<2x6x7x2x3xf32>)
   return
 }
 
@@ -157,7 +157,7 @@
   // CHECK-NOT:  dilations =
   linalg.depthwise_conv_2d_nhwc_hwc
     ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>)
-    outs(%output: memref<1x56x56x96xf32>)
+    inits(%output: memref<1x56x56x96xf32>)
   return
 }
 
@@ -167,7 +167,7 @@
   // expected-error @+1 {{op attribute 'strides' failed to satisfy constraint: 64-bit signless int elements attribute of shape [2]}}
   linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2.0> : vector<2xf32>}
     ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>)
-    outs(%output: memref<1x56x56x96xf32>)
+    inits(%output: memref<1x56x56x96xf32>)
   return
 }
 
@@ -177,7 +177,7 @@
   // expected-error @+1 {{op attribute 'strides' failed to satisfy constraint: 64-bit signless int elements attribute of shape [2]}}
   linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<3xi64> }
     ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>)
-    outs(%output: memref<1x56x56x96xf32>)
+    inits(%output: memref<1x56x56x96xf32>)
   return
 }
 
@@ -187,11 +187,11 @@
 func.func @depthwise_conv_3d_ndhwc_dhwcm(%input: tensor<2x6x13x12x6xf32>, %filter: tensor<2x1x3x6x6xf32>) -> tensor<2x3x13x4x6x6xf32> {
   %zero = arith.constant 0.000000e+00 : f32
   %init = tensor.empty() : tensor<2x3x13x4x6x6xf32>
-  %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x3x13x4x6x6xf32>) -> tensor<2x3x13x4x6x6xf32>
+  %fill = linalg.fill ins(%zero : f32) inits(%init : tensor<2x3x13x4x6x6xf32>) -> tensor<2x3x13x4x6x6xf32>
   // CHECK: depthwise_conv_3d_ndhwc_dhwcm
   %0 = linalg.depthwise_conv_3d_ndhwc_dhwcm {dilations = dense<1> : tensor<3xi64>, strides = dense<[2, 1, 3]> : tensor<3xi64>}
     ins(%input, %filter : tensor<2x6x13x12x6xf32>, tensor<2x1x3x6x6xf32>)
-    outs(%fill : tensor<2x3x13x4x6x6xf32>) -> tensor<2x3x13x4x6x6xf32>
+    inits(%fill : tensor<2x3x13x4x6x6xf32>) -> tensor<2x3x13x4x6x6xf32>
   return %0 : tensor<2x3x13x4x6x6xf32>
 }
 
@@ -201,11 +201,11 @@
 func.func @depthwise_conv_3d_ndhwc_dhwc(%input: tensor<2x6x13x12x6xf32>, %filter: tensor<2x1x3x6xf32>) -> tensor<2x3x13x4x6xf32> {
   %zero = arith.constant 0.000000e+00 : f32
   %init = tensor.empty() : tensor<2x3x13x4x6xf32>
-  %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x3x13x4x6xf32>) -> tensor<2x3x13x4x6xf32>
+  %fill = linalg.fill ins(%zero : f32) inits(%init : tensor<2x3x13x4x6xf32>) -> tensor<2x3x13x4x6xf32>
   // CHECK: depthwise_conv_3d_ndhwc_dhwc
   %0 = linalg.depthwise_conv_3d_ndhwc_dhwc {dilations = dense<1> : tensor<3xi64>, strides = dense<[2, 1, 3]> : tensor<3xi64>}
     ins(%input, %filter : tensor<2x6x13x12x6xf32>, tensor<2x1x3x6xf32>)
-    outs(%fill : tensor<2x3x13x4x6xf32>) -> tensor<2x3x13x4x6xf32>
+    inits(%fill : tensor<2x3x13x4x6xf32>) -> tensor<2x3x13x4x6xf32>
   return %0 : tensor<2x3x13x4x6xf32>
 }
 
@@ -217,7 +217,7 @@
   // CHECK-SAME:   dilations = dense<1> : tensor<1xi64>
   // CHECK-SAME:   strides = dense<1> : tensor<1xi64>
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<?x?x?xf32>, tensor<?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+  // CHECK-SAME:   inits(%{{.+}} : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
   %0 = linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>,
                                             strides = dense<1> : tensor<1xi64>}
      ins (%input, %filter: tensor<?x?x?xf32>, tensor<?x?x?xf32>)
@@ -233,7 +233,7 @@
   // CHECK-SAME:   dilations = dense<1> : tensor<1xi64>
   // CHECK-SAME:   strides = dense<1> : tensor<1xi64>
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : memref<?x?x?xf32>, memref<?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : memref<?x?x?xf32>)
+  // CHECK-SAME:   inits(%{{.+}} : memref<?x?x?xf32>)
   linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>,
                                        strides = dense<1> : tensor<1xi64>}
      ins (%input, %filter: memref<?x?x?xf32>, memref<?x?x?xf32>)
@@ -249,7 +249,7 @@
   // CHECK-SAME:   dilations = dense<1> : tensor<1xi64>
   // CHECK-SAME:   strides = dense<1> : tensor<1xi64>
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<?x?x?xf32>, tensor<?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+  // CHECK-SAME:   inits(%{{.+}} : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
   %0 = linalg.conv_1d_ncw_fcw {dilations = dense<1> : tensor<1xi64>,
                                             strides = dense<1> : tensor<1xi64>}
      ins (%input, %filter: tensor<?x?x?xf32>, tensor<?x?x?xf32>)
@@ -265,7 +265,7 @@
   // CHECK-SAME:   dilations = dense<1> : tensor<1xi64>
   // CHECK-SAME:   strides = dense<1> : tensor<1xi64>
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : memref<?x?x?xf32>, memref<?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : memref<?x?x?xf32>)
+  // CHECK-SAME:   inits(%{{.+}} : memref<?x?x?xf32>)
   linalg.conv_1d_ncw_fcw {dilations = dense<1> : tensor<1xi64>,
                                        strides = dense<1> : tensor<1xi64>}
      ins (%input, %filter: memref<?x?x?xf32>, memref<?x?x?xf32>)
@@ -281,7 +281,7 @@
   // CHECK-SAME:   dilations = dense<1> : tensor<2xi64>
   // CHECK-SAME:   strides = dense<1> : tensor<2xi64>
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
+  // CHECK-SAME:   inits(%{{.+}} : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
   %0 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>,
                                               strides = dense<1> : tensor<2xi64>}
      ins (%input, %filter: tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
@@ -297,7 +297,7 @@
   // CHECK-SAME:   dilations = dense<1> : tensor<2xi64>
   // CHECK-SAME:   strides = dense<1> : tensor<2xi64>
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<?x?x?x?x?xf32>, tensor<?x?x?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32>
+  // CHECK-SAME:   inits(%{{.+}} : tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32>
   %0 = linalg.conv_2d_ngchw_fgchw {dilations = dense<1> : tensor<2xi64>,
                                               strides = dense<1> : tensor<2xi64>}
      ins (%input, %filter: tensor<?x?x?x?x?xf32>, tensor<?x?x?x?x?xf32>)
@@ -313,7 +313,7 @@
   // CHECK-SAME:   dilations = dense<1> : tensor<2xi64>
   // CHECK-SAME:   strides = dense<1> : tensor<2xi64>
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
+  // CHECK-SAME:   inits(%{{.+}} : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
   %0 = linalg.conv_2d_nhwc_fhwc {dilations = dense<1> : tensor<2xi64>,
                                  strides = dense<1> : tensor<2xi64>}
      ins (%input, %filter: tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
@@ -329,7 +329,7 @@
   // CHECK-SAME:   dilations = dense<1> : tensor<2xi64>
   // CHECK-SAME:   strides = dense<1> : tensor<2xi64>
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<?x128x128x32xf32>, tensor<64x3x3x32xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<?x126x126x64xf32>) -> tensor<?x126x126x64xf32>
+  // CHECK-SAME:   inits(%{{.+}} : tensor<?x126x126x64xf32>) -> tensor<?x126x126x64xf32>
   %0 = linalg.conv_2d_nhwc_fhwc {dilations = dense<1> : tensor<2xi64>,
                                  strides = dense<1> : tensor<2xi64>}
      ins (%input, %filter: tensor<?x128x128x32xf32>, tensor<64x3x3x32xf32>)
@@ -345,7 +345,7 @@
   // CHECK-SAME:   dilations = dense<1> : tensor<2xi64>
   // CHECK-SAME:   strides = dense<1> : tensor<2xi64>
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : memref<?x?x?x?xf32>, memref<?x?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : memref<?x?x?x?xf32>)
+  // CHECK-SAME:   inits(%{{.+}} : memref<?x?x?x?xf32>)
   linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>,
                                          strides = dense<1> : tensor<2xi64>}
      ins (%input, %filter: memref<?x?x?x?xf32>, memref<?x?x?x?xf32>)
@@ -361,7 +361,7 @@
   // CHECK-SAME:   dilations = dense<1> : tensor<2xi64>
   // CHECK-SAME:   strides = dense<1> : tensor<2xi64>
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : memref<?x?x?x?x?xf32>)
+  // CHECK-SAME:   inits(%{{.+}} : memref<?x?x?x?x?xf32>)
   linalg.conv_2d_ngchw_fgchw {dilations = dense<1> : tensor<2xi64>,
                                          strides = dense<1> : tensor<2xi64>}
      ins (%input, %filter: memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>)
@@ -377,7 +377,7 @@
   // CHECK-SAME:   dilations = dense<1> : tensor<3xi64>
   // CHECK-SAME:   strides = dense<1> : tensor<3xi64>
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<?x?x?x?x?xf32>, tensor<?x?x?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32>
+  // CHECK-SAME:   inits(%{{.+}} : tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32>
   %0 = linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>,
                                                 strides = dense<1> : tensor<3xi64>}
      ins (%input, %filter: tensor<?x?x?x?x?xf32>, tensor<?x?x?x?x?xf32>)
@@ -393,7 +393,7 @@
   // CHECK-SAME:   dilations = dense<1> : tensor<3xi64>
   // CHECK-SAME:   strides = dense<1> : tensor<3xi64>
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : memref<?x?x?x?x?xf32>)
+  // CHECK-SAME:   inits(%{{.+}} : memref<?x?x?x?x?xf32>)
   linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>,
                                            strides = dense<1> : tensor<3xi64>}
      ins (%input, %filter: memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>)
@@ -408,15 +408,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xf32>, tensor<3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
 func.func @pooling_nhwc_sum_tensor(%input: tensor<1x4x4x1xf32>) -> tensor<1x2x2x1xf32> {
   %fake = tensor.empty() : tensor<3x3xf32>
   %init = tensor.empty() : tensor<1x2x2x1xf32>
   %cst = arith.constant 0.000000e+00 : f32
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
   %res = linalg.pooling_nhwc_sum {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: tensor<1x4x4x1xf32>, tensor<3x3xf32>)
-    outs(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
+    inits(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
   return %res : tensor<1x2x2x1xf32>
 }
 
@@ -427,15 +427,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<1xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<1xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x1xf32>, tensor<3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x2x1xf32>) -> tensor<1x2x1xf32>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x2x1xf32>) -> tensor<1x2x1xf32>
 func.func @pooling_nwc_sum_tensor(%input: tensor<1x4x1xf32>) -> tensor<1x2x1xf32> {
   %fake = tensor.empty() : tensor<3xf32>
   %init = tensor.empty() : tensor<1x2x1xf32>
   %cst = arith.constant 0.000000e+00 : f32
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x1xf32>) -> tensor<1x2x1xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x1xf32>) -> tensor<1x2x1xf32>
   %res = linalg.pooling_nwc_sum {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %fake: tensor<1x4x1xf32>, tensor<3xf32>)
-    outs(%fill: tensor<1x2x1xf32>) -> tensor<1x2x1xf32>
+    inits(%fill: tensor<1x2x1xf32>) -> tensor<1x2x1xf32>
   return %res : tensor<1x2x1xf32>
 }
 
@@ -446,11 +446,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xf32>, memref<3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x2x2x1xf32>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x2x2x1xf32>)
 func.func @pooling_nhwc_sum(%input: memref<1x4x4x1xf32>, %fake: memref<3x3xf32>, %output: memref<1x2x2x1xf32>) {
   linalg.pooling_nhwc_sum {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: memref<1x4x4x1xf32>, memref<3x3xf32>)
-    outs(%output: memref<1x2x2x1xf32>)
+    inits(%output: memref<1x2x2x1xf32>)
   return
 }
 
@@ -461,11 +461,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<1xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<1xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x1xf32>, memref<3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x2x1xf32>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x2x1xf32>)
 func.func @pooling_nwc_sum(%input: memref<1x4x1xf32>, %fake: memref<3xf32>, %output: memref<1x2x1xf32>) {
   linalg.pooling_nwc_sum {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %fake: memref<1x4x1xf32>, memref<3xf32>)
-    outs(%output: memref<1x2x1xf32>)
+    inits(%output: memref<1x2x1xf32>)
   return
 }
 
@@ -476,15 +476,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x1x4x4xf32>, tensor<3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
 func.func @pooling_nchw_sum_tensor(%input: tensor<1x1x4x4xf32>) -> tensor<1x1x2x2xf32> {
   %fake = tensor.empty() : tensor<3x3xf32>
   %init = tensor.empty() : tensor<1x1x2x2xf32>
   %cst = arith.constant 0.000000e+00 : f32
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
   %res = linalg.pooling_nchw_sum {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: tensor<1x1x4x4xf32>, tensor<3x3xf32>)
-    outs(%fill: tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
+    inits(%fill: tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
   return %res : tensor<1x1x2x2xf32>
 }
 
@@ -495,15 +495,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<1xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<1xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x1x4xf32>, tensor<3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x1x2xf32>) -> tensor<1x1x2xf32>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x1x2xf32>) -> tensor<1x1x2xf32>
 func.func @pooling_ncw_sum_tensor(%input: tensor<1x1x4xf32>) -> tensor<1x1x2xf32> {
   %fake = tensor.empty() : tensor<3xf32>
   %init = tensor.empty() : tensor<1x1x2xf32>
   %cst = arith.constant 0.000000e+00 : f32
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x1x2xf32>) -> tensor<1x1x2xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x1x2xf32>) -> tensor<1x1x2xf32>
   %res = linalg.pooling_ncw_sum {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %fake: tensor<1x1x4xf32>, tensor<3xf32>)
-    outs(%fill: tensor<1x1x2xf32>) -> tensor<1x1x2xf32>
+    inits(%fill: tensor<1x1x2xf32>) -> tensor<1x1x2xf32>
   return %res : tensor<1x1x2xf32>
 }
 
@@ -514,11 +514,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x1x4x4xf32>, memref<3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x1x2x2xf32>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x1x2x2xf32>)
 func.func @pooling_nchw_sum(%input: memref<1x1x4x4xf32>, %fake: memref<3x3xf32>, %output: memref<1x1x2x2xf32>) {
   linalg.pooling_nchw_sum {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: memref<1x1x4x4xf32>, memref<3x3xf32>)
-    outs(%output: memref<1x1x2x2xf32>)
+    inits(%output: memref<1x1x2x2xf32>)
   return
 }
 
@@ -529,11 +529,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<1xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<1xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x1x4xf32>, memref<3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x1x2xf32>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x1x2xf32>)
 func.func @pooling_ncw_sum(%input: memref<1x1x4xf32>, %fake: memref<3xf32>, %output: memref<1x1x2xf32>) {
   linalg.pooling_ncw_sum {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %fake: memref<1x1x4xf32>, memref<3xf32>)
-    outs(%output: memref<1x1x2xf32>)
+    inits(%output: memref<1x1x2xf32>)
   return
 }
 
@@ -544,15 +544,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xf32>, tensor<3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
 func.func @pooling_nhwc_max_tensor(%input: tensor<1x4x4x1xf32>) -> tensor<1x2x2x1xf32> {
   %fake = tensor.empty() : tensor<3x3xf32>
   %init = tensor.empty() : tensor<1x2x2x1xf32>
   %cst = arith.constant 0.000000e+00 : f32
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
   %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: tensor<1x4x4x1xf32>, tensor<3x3xf32>)
-    outs(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
+    inits(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
   return %res : tensor<1x2x2x1xf32>
 }
 
@@ -562,15 +562,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<1xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<1xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x1xf32>, tensor<3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x2x1xf32>) -> tensor<1x2x1xf32>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x2x1xf32>) -> tensor<1x2x1xf32>
 func.func @pooling_nwc_max_tensor(%input: tensor<1x4x1xf32>) -> tensor<1x2x1xf32> {
   %fake = tensor.empty() : tensor<3xf32>
   %init = tensor.empty() : tensor<1x2x1xf32>
   %cst = arith.constant 0.000000e+00 : f32
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x1xf32>) -> tensor<1x2x1xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x1xf32>) -> tensor<1x2x1xf32>
   %res = linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %fake: tensor<1x4x1xf32>, tensor<3xf32>)
-    outs(%fill: tensor<1x2x1xf32>) -> tensor<1x2x1xf32>
+    inits(%fill: tensor<1x2x1xf32>) -> tensor<1x2x1xf32>
   return %res : tensor<1x2x1xf32>
 }
 
@@ -580,16 +580,16 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x1x4x4xf32>, tensor<3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
 
 func.func @pooling_nchw_max_tensor(%input: tensor<1x1x4x4xf32>) -> tensor<1x1x2x2xf32> {
   %fake = tensor.empty() : tensor<3x3xf32>
   %init = tensor.empty() : tensor<1x1x2x2xf32>
   %cst = arith.constant 0.000000e+00 : f32
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
   %res = linalg.pooling_nchw_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: tensor<1x1x4x4xf32>, tensor<3x3xf32>)
-    outs(%fill: tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
+    inits(%fill: tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
   return %res : tensor<1x1x2x2xf32>
 }
 
@@ -599,16 +599,16 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<1xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<1xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x1x4xf32>, tensor<3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x1x2xf32>) -> tensor<1x1x2xf32>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x1x2xf32>) -> tensor<1x1x2xf32>
 
 func.func @pooling_ncw_max_tensor(%input: tensor<1x1x4xf32>) -> tensor<1x1x2xf32> {
   %fake = tensor.empty() : tensor<3xf32>
   %init = tensor.empty() : tensor<1x1x2xf32>
   %cst = arith.constant 0.000000e+00 : f32
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x1x2xf32>) -> tensor<1x1x2xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x1x2xf32>) -> tensor<1x1x2xf32>
   %res = linalg.pooling_ncw_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %fake: tensor<1x1x4xf32>, tensor<3xf32>)
-    outs(%fill: tensor<1x1x2xf32>) -> tensor<1x1x2xf32>
+    inits(%fill: tensor<1x1x2xf32>) -> tensor<1x1x2xf32>
   return %res : tensor<1x1x2xf32>
 }
 
@@ -619,11 +619,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xf32>, memref<3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x2x2x1xf32>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x2x2x1xf32>)
 func.func @pooling_nhwc_max(%input: memref<1x4x4x1xf32>, %fake: memref<3x3xf32>, %output: memref<1x2x2x1xf32>) {
   linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: memref<1x4x4x1xf32>, memref<3x3xf32>)
-    outs(%output: memref<1x2x2x1xf32>)
+    inits(%output: memref<1x2x2x1xf32>)
   return
 }
 
@@ -634,11 +634,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<1xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<1xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x1xf32>, memref<3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x2x1xf32>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x2x1xf32>)
 func.func @pooling_nwc_max(%input: memref<1x4x1xf32>, %fake: memref<3xf32>, %output: memref<1x2x1xf32>) {
   linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %fake: memref<1x4x1xf32>, memref<3xf32>)
-    outs(%output: memref<1x2x1xf32>)
+    inits(%output: memref<1x2x1xf32>)
   return
 }
 
@@ -649,15 +649,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xi8>, tensor<3x3xi8>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8>
 func.func @pooling_nhwc_i8_max_tensor(%input: tensor<1x4x4x1xi8>) -> tensor<1x2x2x1xi8> {
   %fake = tensor.empty() : tensor<3x3xi8>
   %init = tensor.empty() : tensor<1x2x2x1xi8>
   %cst = arith.constant 0 : i8
-  %fill = linalg.fill ins(%cst : i8) outs(%init : tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8>
+  %fill = linalg.fill ins(%cst : i8) inits(%init : tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8>
   %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: tensor<1x4x4x1xi8>, tensor<3x3xi8>)
-    outs(%fill: tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8>
+    inits(%fill: tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8>
   return %res : tensor<1x2x2x1xi8>
 }
 
@@ -668,15 +668,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<1xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<1xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x1xi8>, tensor<3xi8>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x2x1xi8>) -> tensor<1x2x1xi8>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x2x1xi8>) -> tensor<1x2x1xi8>
 func.func @pooling_nwc_i8_max_tensor(%input: tensor<1x4x1xi8>) -> tensor<1x2x1xi8> {
   %fake = tensor.empty() : tensor<3xi8>
   %init = tensor.empty() : tensor<1x2x1xi8>
   %cst = arith.constant 0 : i8
-  %fill = linalg.fill ins(%cst : i8) outs(%init : tensor<1x2x1xi8>) -> tensor<1x2x1xi8>
+  %fill = linalg.fill ins(%cst : i8) inits(%init : tensor<1x2x1xi8>) -> tensor<1x2x1xi8>
   %res = linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %fake: tensor<1x4x1xi8>, tensor<3xi8>)
-    outs(%fill: tensor<1x2x1xi8>) -> tensor<1x2x1xi8>
+    inits(%fill: tensor<1x2x1xi8>) -> tensor<1x2x1xi8>
   return %res : tensor<1x2x1xi8>
 }
 
@@ -687,11 +687,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xi8>, memref<3x3xi8>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x2x2x1xi8>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x2x2x1xi8>)
 func.func @pooling_nhwc_i8_max(%input: memref<1x4x4x1xi8>, %fake: memref<3x3xi8>, %output: memref<1x2x2x1xi8>) {
   linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: memref<1x4x4x1xi8>, memref<3x3xi8>)
-    outs(%output: memref<1x2x2x1xi8>)
+    inits(%output: memref<1x2x2x1xi8>)
   return
 }
 
@@ -702,11 +702,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<1xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<1xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x1xi8>, memref<3xi8>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x2x1xi8>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x2x1xi8>)
 func.func @pooling_nwc_i8_max(%input: memref<1x4x1xi8>, %fake: memref<3xi8>, %output: memref<1x2x1xi8>) {
   linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %fake: memref<1x4x1xi8>, memref<3xi8>)
-    outs(%output: memref<1x2x1xi8>)
+    inits(%output: memref<1x2x1xi8>)
   return
 }
 
@@ -717,15 +717,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xi16>, tensor<3x3xi16>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16>
 func.func @pooling_nhwc_i16_max_tensor(%input: tensor<1x4x4x1xi16>) -> tensor<1x2x2x1xi16> {
   %fake = tensor.empty() : tensor<3x3xi16>
   %init = tensor.empty() : tensor<1x2x2x1xi16>
   %cst = arith.constant 0 : i16
-  %fill = linalg.fill ins(%cst : i16) outs(%init : tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16>
+  %fill = linalg.fill ins(%cst : i16) inits(%init : tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16>
   %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: tensor<1x4x4x1xi16>, tensor<3x3xi16>)
-    outs(%fill: tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16>
+    inits(%fill: tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16>
   return %res : tensor<1x2x2x1xi16>
 }
 
@@ -736,15 +736,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<1xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<1xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x1xi16>, tensor<3xi16>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x2x1xi16>) -> tensor<1x2x1xi16>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x2x1xi16>) -> tensor<1x2x1xi16>
 func.func @pooling_nwc_i16_max_tensor(%input: tensor<1x4x1xi16>) -> tensor<1x2x1xi16> {
   %fake = tensor.empty() : tensor<3xi16>
   %init = tensor.empty() : tensor<1x2x1xi16>
   %cst = arith.constant 0 : i16
-  %fill = linalg.fill ins(%cst : i16) outs(%init : tensor<1x2x1xi16>) -> tensor<1x2x1xi16>
+  %fill = linalg.fill ins(%cst : i16) inits(%init : tensor<1x2x1xi16>) -> tensor<1x2x1xi16>
   %res = linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %fake: tensor<1x4x1xi16>, tensor<3xi16>)
-    outs(%fill: tensor<1x2x1xi16>) -> tensor<1x2x1xi16>
+    inits(%fill: tensor<1x2x1xi16>) -> tensor<1x2x1xi16>
   return %res : tensor<1x2x1xi16>
 }
 
@@ -755,11 +755,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xi16>, memref<3x3xi16>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x2x2x1xi16>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x2x2x1xi16>)
 func.func @pooling_nhwc_i16_max(%input: memref<1x4x4x1xi16>, %fake: memref<3x3xi16>, %output: memref<1x2x2x1xi16>) {
   linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: memref<1x4x4x1xi16>, memref<3x3xi16>)
-    outs(%output: memref<1x2x2x1xi16>)
+    inits(%output: memref<1x2x2x1xi16>)
   return
 }
 
@@ -770,11 +770,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<1xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<1xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x1xi16>, memref<3xi16>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x2x1xi16>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x2x1xi16>)
 func.func @pooling_nwc_i16_max(%input: memref<1x4x1xi16>, %fake: memref<3xi16>, %output: memref<1x2x1xi16>) {
   linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %fake: memref<1x4x1xi16>, memref<3xi16>)
-    outs(%output: memref<1x2x1xi16>)
+    inits(%output: memref<1x2x1xi16>)
   return
 }
 
@@ -785,15 +785,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xi32>, tensor<3x3xi32>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32>
 func.func @pooling_nhwc_i32_max_tensor(%input: tensor<1x4x4x1xi32>) -> tensor<1x2x2x1xi32> {
   %fake = tensor.empty() : tensor<3x3xi32>
   %init = tensor.empty() : tensor<1x2x2x1xi32>
   %cst = arith.constant 0 : i32
-  %fill = linalg.fill ins(%cst : i32) outs(%init : tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32>
+  %fill = linalg.fill ins(%cst : i32) inits(%init : tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32>
   %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: tensor<1x4x4x1xi32>, tensor<3x3xi32>)
-    outs(%fill: tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32>
+    inits(%fill: tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32>
   return %res : tensor<1x2x2x1xi32>
 }
 
@@ -804,15 +804,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<1xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<1xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x1xi32>, tensor<3xi32>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x2x1xi32>) -> tensor<1x2x1xi32>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x2x1xi32>) -> tensor<1x2x1xi32>
 func.func @pooling_nwc_i32_max_tensor(%input: tensor<1x4x1xi32>) -> tensor<1x2x1xi32> {
   %fake = tensor.empty() : tensor<3xi32>
   %init = tensor.empty() : tensor<1x2x1xi32>
   %cst = arith.constant 0 : i32
-  %fill = linalg.fill ins(%cst : i32) outs(%init : tensor<1x2x1xi32>) -> tensor<1x2x1xi32>
+  %fill = linalg.fill ins(%cst : i32) inits(%init : tensor<1x2x1xi32>) -> tensor<1x2x1xi32>
   %res = linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %fake: tensor<1x4x1xi32>, tensor<3xi32>)
-    outs(%fill: tensor<1x2x1xi32>) -> tensor<1x2x1xi32>
+    inits(%fill: tensor<1x2x1xi32>) -> tensor<1x2x1xi32>
   return %res : tensor<1x2x1xi32>
 }
 
@@ -823,11 +823,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xi32>, memref<3x3xi32>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x2x2x1xi32>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x2x2x1xi32>)
 func.func @pooling_nhwc_i32_max(%input: memref<1x4x4x1xi32>, %fake: memref<3x3xi32>, %output: memref<1x2x2x1xi32>) {
   linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: memref<1x4x4x1xi32>, memref<3x3xi32>)
-    outs(%output: memref<1x2x2x1xi32>)
+    inits(%output: memref<1x2x2x1xi32>)
   return
 }
 
@@ -838,11 +838,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<1xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<1xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x1xi32>, memref<3xi32>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x2x1xi32>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x2x1xi32>)
 func.func @pooling_nwc_i32_max(%input: memref<1x4x1xi32>, %fake: memref<3xi32>, %output: memref<1x2x1xi32>) {
   linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %fake: memref<1x4x1xi32>, memref<3xi32>)
-    outs(%output: memref<1x2x1xi32>)
+    inits(%output: memref<1x2x1xi32>)
   return
 }
 
@@ -854,15 +854,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xf32>, tensor<3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
 func.func @pooling_nhwc_min_tensor(%input: tensor<1x4x4x1xf32>) -> tensor<1x2x2x1xf32> {
   %fake = tensor.empty() : tensor<3x3xf32>
   %init = tensor.empty() : tensor<1x2x2x1xf32>
   %cst = arith.constant 0.000000e+00 : f32
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
   %res = linalg.pooling_nhwc_min {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: tensor<1x4x4x1xf32>, tensor<3x3xf32>)
-    outs(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
+    inits(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
   return %res : tensor<1x2x2x1xf32>
 }
 
@@ -873,15 +873,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<1xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<1xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x1xf32>, tensor<3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x2x1xf32>) -> tensor<1x2x1xf32>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x2x1xf32>) -> tensor<1x2x1xf32>
 func.func @pooling_nwc_min_tensor(%input: tensor<1x4x1xf32>) -> tensor<1x2x1xf32> {
   %fake = tensor.empty() : tensor<3xf32>
   %init = tensor.empty() : tensor<1x2x1xf32>
   %cst = arith.constant 0.000000e+00 : f32
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x1xf32>) -> tensor<1x2x1xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x1xf32>) -> tensor<1x2x1xf32>
   %res = linalg.pooling_nwc_min {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %fake: tensor<1x4x1xf32>, tensor<3xf32>)
-    outs(%fill: tensor<1x2x1xf32>) -> tensor<1x2x1xf32>
+    inits(%fill: tensor<1x2x1xf32>) -> tensor<1x2x1xf32>
   return %res : tensor<1x2x1xf32>
 }
 
@@ -892,11 +892,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xf32>, memref<3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x2x2x1xf32>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x2x2x1xf32>)
 func.func @pooling_nhwc_min(%input: memref<1x4x4x1xf32>, %fake: memref<3x3xf32>, %output: memref<1x2x2x1xf32>) {
   linalg.pooling_nhwc_min {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: memref<1x4x4x1xf32>, memref<3x3xf32>)
-    outs(%output: memref<1x2x2x1xf32>)
+    inits(%output: memref<1x2x2x1xf32>)
   return
 }
 
@@ -907,11 +907,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<1xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<1xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x1xf32>, memref<3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x2x1xf32>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x2x1xf32>)
 func.func @pooling_nwc_min(%input: memref<1x4x1xf32>, %fake: memref<3xf32>, %output: memref<1x2x1xf32>) {
   linalg.pooling_nwc_min {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %fake: memref<1x4x1xf32>, memref<3xf32>)
-    outs(%output: memref<1x2x1xf32>)
+    inits(%output: memref<1x2x1xf32>)
   return
 }
 
@@ -922,15 +922,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<3xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<3xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
 func.func @pooling_ndhwc_sum_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> {
   %fake = tensor.empty() : tensor<3x3x3xf32>
   %init = tensor.empty() : tensor<1x2x2x2x1xf32>
   %cst = arith.constant 0.000000e+00 : f32
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
   %res = linalg.pooling_ndhwc_sum {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
     ins(%input, %fake: tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>)
-    outs(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
+    inits(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
   return %res : tensor<1x2x2x2x1xf32>
 }
 
@@ -941,11 +941,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<3xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<3xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x4x4x1xf32>, memref<3x3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x2x2x2x1xf32>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x2x2x2x1xf32>)
 func.func @pooling_ndhwc_sum(%input: memref<1x4x4x4x1xf32>, %fake: memref<3x3x3xf32>, %output: memref<1x2x2x2x1xf32>) {
   linalg.pooling_ndhwc_sum {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
     ins(%input, %fake: memref<1x4x4x4x1xf32>, memref<3x3x3xf32>)
-    outs(%output: memref<1x2x2x2x1xf32>)
+    inits(%output: memref<1x2x2x2x1xf32>)
   return
 }
 
@@ -956,15 +956,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<3xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<3xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
 func.func @pooling_ndhwc_max_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> {
   %fake = tensor.empty() : tensor<3x3x3xf32>
   %init = tensor.empty() : tensor<1x2x2x2x1xf32>
   %cst = arith.constant 0.000000e+00 : f32
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
   %res = linalg.pooling_ndhwc_max {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
     ins(%input, %fake: tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>)
-    outs(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
+    inits(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
   return %res : tensor<1x2x2x2x1xf32>
 }
 
@@ -975,11 +975,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<3xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<3xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x4x4x1xf32>, memref<3x3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x2x2x2x1xf32>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x2x2x2x1xf32>)
 func.func @pooling_ndhwc_max(%input: memref<1x4x4x4x1xf32>, %fake: memref<3x3x3xf32>, %output: memref<1x2x2x2x1xf32>) {
   linalg.pooling_ndhwc_max {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
     ins(%input, %fake: memref<1x4x4x4x1xf32>, memref<3x3x3xf32>)
-    outs(%output: memref<1x2x2x2x1xf32>)
+    inits(%output: memref<1x2x2x2x1xf32>)
   return
 }
 
@@ -990,15 +990,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<3xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<3xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
 func.func @pooling_ndhwc_min_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> {
   %fake = tensor.empty() : tensor<3x3x3xf32>
   %init = tensor.empty() : tensor<1x2x2x2x1xf32>
   %cst = arith.constant 0.000000e+00 : f32
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
   %res = linalg.pooling_ndhwc_min {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
     ins(%input, %fake: tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>)
-    outs(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
+    inits(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
   return %res : tensor<1x2x2x2x1xf32>
 }
 
@@ -1009,11 +1009,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<3xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<3xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x4x4x1xf32>, memref<3x3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x2x2x2x1xf32>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x2x2x2x1xf32>)
 func.func @pooling_ndhwc_min(%input: memref<1x4x4x4x1xf32>, %fake: memref<3x3x3xf32>, %output: memref<1x2x2x2x1xf32>) {
   linalg.pooling_ndhwc_min {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
     ins(%input, %fake: memref<1x4x4x4x1xf32>, memref<3x3x3xf32>)
-    outs(%output: memref<1x2x2x2x1xf32>)
+    inits(%output: memref<1x2x2x2x1xf32>)
   return
 }
 
@@ -1056,8 +1056,8 @@
 func.func @batch_reduce_matmul(%arg0: tensor<8x128x256xf32>, %arg1: tensor<8x256x512xf32>, %arg2: tensor<128x512xf32>) -> tensor<128x512xf32> {
   // CHECK: %{{.+}} = linalg.batch_reduce_matmul
   // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<8x128x256xf32>, tensor<8x256x512xf32>)
-  // CHECK-SAME: outs(%{{.+}} : tensor<128x512xf32>) -> tensor<128x512xf32>
-  %0 = linalg.batch_reduce_matmul ins(%arg0, %arg1 : tensor<8x128x256xf32>, tensor<8x256x512xf32>) outs(%arg2: tensor<128x512xf32>) -> tensor<128x512xf32>
+  // CHECK-SAME: inits(%{{.+}} : tensor<128x512xf32>) -> tensor<128x512xf32>
+  %0 = linalg.batch_reduce_matmul ins(%arg0, %arg1 : tensor<8x128x256xf32>, tensor<8x256x512xf32>) inits(%arg2: tensor<128x512xf32>) -> tensor<128x512xf32>
   return %0: tensor<128x512xf32>
 }
 
@@ -1066,8 +1066,8 @@
 func.func @batch_reduce_matmul(%arg0: memref<?x?x?xf32>, %arg1: memref<?x?x?xf32>, %arg2: memref<?x?xf32>) {
   // CHECK: linalg.batch_reduce_matmul
   // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<?x?x?xf32>, memref<?x?x?xf32>)
-  // CHECK-SAME: outs(%{{.+}} : memref<?x?xf32>)
-  linalg.batch_reduce_matmul ins(%arg0, %arg1 : memref<?x?x?xf32>, memref<?x?x?xf32>) outs(%arg2: memref<?x?xf32>)
+  // CHECK-SAME: inits(%{{.+}} : memref<?x?xf32>)
+  linalg.batch_reduce_matmul ins(%arg0, %arg1 : memref<?x?x?xf32>, memref<?x?x?xf32>) inits(%arg2: memref<?x?xf32>)
   return
 }
 
@@ -1076,9 +1076,9 @@
 // CHECK-LABEL: func @matmul_transpose_b
 //       CHECK:   linalg.matmul_transpose_b
 //  CHECK-SAME:     ins(%{{.+}}, %{{.+}} : memref<3x5xf32>, memref<7x5xf32>)
-//  CHECK-SAME:     outs(%{{.+}} : memref<3x7xf32>)
+//  CHECK-SAME:     inits(%{{.+}} : memref<3x7xf32>)
 func.func @matmul_transpose_b(%arg0: memref<3x5xf32>, %arg1: memref<7x5xf32>, %arg2: memref<3x7xf32>) {
-  linalg.matmul_transpose_b ins(%arg0, %arg1 : memref<3x5xf32>, memref<7x5xf32>) outs(%arg2: memref<3x7xf32>)
+  linalg.matmul_transpose_b ins(%arg0, %arg1 : memref<3x5xf32>, memref<7x5xf32>) inits(%arg2: memref<3x7xf32>)
   return
 }
 
@@ -1087,8 +1087,8 @@
 // CHECK-LABEL: func @batchmatmul_transpose_b
 //       CHECK:   linalg.batch_matmul_transpose_b
 //  CHECK-SAME:     ins(%{{.+}}, %{{.+}} : memref<2x3x5xf32>, memref<2x7x5xf32>)
-//  CHECK-SAME:     outs(%{{.+}} : memref<2x3x7xf32>)
+//  CHECK-SAME:     inits(%{{.+}} : memref<2x3x7xf32>)
 func.func @batchmatmul_transpose_b(%arg0: memref<2x3x5xf32>, %arg1: memref<2x7x5xf32>, %arg2: memref<2x3x7xf32>) {
-  linalg.batch_matmul_transpose_b ins(%arg0, %arg1 : memref<2x3x5xf32>, memref<2x7x5xf32>) outs(%arg2: memref<2x3x7xf32>)
+  linalg.batch_matmul_transpose_b ins(%arg0, %arg1 : memref<2x3x5xf32>, memref<2x7x5xf32>) inits(%arg2: memref<2x3x7xf32>)
   return
 }
diff --git a/mlir/test/Dialect/Linalg/namedop_conversion.mlir b/mlir/test/Dialect/Linalg/namedop_conversion.mlir
--- a/mlir/test/Dialect/Linalg/namedop_conversion.mlir
+++ b/mlir/test/Dialect/Linalg/namedop_conversion.mlir
@@ -4,9 +4,9 @@
 func.func @depthwise_conv(%arg0: tensor<?x?x?x?xf32>, %arg1: tensor<?x?x?x1xf32>, %arg2: tensor<?x?x?x?x1xf32>) -> tensor<?x?x?x?x1xf32> {
   // CHECK-DAG: %[[KERNEL:.+]] = tensor.collapse_shape %arg1 {{\[\[}}0], [1], [2, 3]]
   // CHECK-DAG: %[[INIT:.+]] = tensor.collapse_shape %arg2 {{\[\[}}0], [1], [2], [3, 4]]
-  // CHECK-DAG: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwc {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %[[KERNEL]] : tensor<?x?x?x?xf32>, tensor<?x?x?xf32>) outs(%[[INIT]] : tensor<?x?x?x?xf32>)
+  // CHECK-DAG: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwc {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %[[KERNEL]] : tensor<?x?x?x?xf32>, tensor<?x?x?xf32>) inits(%[[INIT]] : tensor<?x?x?x?xf32>)
   // CHECK: %[[OUT:.+]] = tensor.expand_shape %[[CONV]] {{\[\[}}0], [1], [2], [3, 4]]
-  %0 = linalg.depthwise_conv_2d_nhwc_hwcm {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<?x?x?x?xf32>, tensor<?x?x?x1xf32>) outs(%arg2 : tensor<?x?x?x?x1xf32>) -> tensor<?x?x?x?x1xf32>
+  %0 = linalg.depthwise_conv_2d_nhwc_hwcm {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<?x?x?x?xf32>, tensor<?x?x?x1xf32>) inits(%arg2 : tensor<?x?x?x?x1xf32>) -> tensor<?x?x?x?x1xf32>
   return %0 : tensor<?x?x?x?x1xf32>
 }
 
@@ -17,8 +17,8 @@
 func.func @depthwise_conv_q(%arg0: tensor<?x?x?x?xi8>, %arg1: tensor<?x?x?x1xi8>, %arg2: tensor<?x?x?x?x1xi32>, %arg3 : i32, %arg4 : i32) -> tensor<?x?x?x?x1xi32> {
   // CHECK-DAG: %[[KERNEL:.+]] = tensor.collapse_shape %arg1 {{\[\[}}0], [1], [2, 3]]
   // CHECK-DAG: %[[INIT:.+]] = tensor.collapse_shape %arg2 {{\[\[}}0], [1], [2], [3, 4]]
-  // CHECK-DAG: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwc_q {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %[[KERNEL]], %arg3, %arg4 : tensor<?x?x?x?xi8>, tensor<?x?x?xi8>, i32, i32) outs(%[[INIT]] : tensor<?x?x?x?xi32>)
+  // CHECK-DAG: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwc_q {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %[[KERNEL]], %arg3, %arg4 : tensor<?x?x?x?xi8>, tensor<?x?x?xi8>, i32, i32) inits(%[[INIT]] : tensor<?x?x?x?xi32>)
   // CHECK: %[[OUT:.+]] = tensor.expand_shape %[[CONV]] {{\[\[}}0], [1], [2], [3, 4]]
-  %0 = linalg.depthwise_conv_2d_nhwc_hwcm_q {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1, %arg3, %arg4 : tensor<?x?x?x?xi8>, tensor<?x?x?x1xi8>, i32, i32) outs(%arg2 : tensor<?x?x?x?x1xi32>) -> tensor<?x?x?x?x1xi32>
+  %0 = linalg.depthwise_conv_2d_nhwc_hwcm_q {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1, %arg3, %arg4 : tensor<?x?x?x?xi8>, tensor<?x?x?x1xi8>, i32, i32) inits(%arg2 : tensor<?x?x?x?x1xi32>) -> tensor<?x?x?x?x1xi32>
   return %0 : tensor<?x?x?x?x1xi32>
 }
diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir
--- a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir
+++ b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir
@@ -18,15 +18,15 @@
   %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -45,15 +45,15 @@
   %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -72,15 +72,15 @@
   %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -99,15 +99,15 @@
   %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -126,15 +126,15 @@
   %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -153,15 +153,15 @@
   %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -180,15 +180,15 @@
   %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -207,15 +207,15 @@
   %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -234,15 +234,15 @@
   %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -261,15 +261,15 @@
   %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -288,15 +288,15 @@
   %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -315,15 +315,15 @@
   %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -344,13 +344,13 @@
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -371,13 +371,13 @@
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -397,13 +397,13 @@
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -424,13 +424,13 @@
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -453,11 +453,11 @@
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -480,11 +480,11 @@
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -505,13 +505,13 @@
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -532,13 +532,13 @@
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -559,13 +559,13 @@
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -586,13 +586,13 @@
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -615,11 +615,11 @@
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -642,10 +642,10 @@
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
--- a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
@@ -22,8 +22,8 @@
 
   /// Inplaceable, no alloc
   // CHECK-NOT: alloc
-  //     CHECK: linalg.fill ins(%[[F0]] : f32) outs(%[[A]] : memref<?xf32, strided<[?], offset: ?>>)
-  %r = linalg.fill ins(%f0 : f32) outs(%A : tensor<?xf32>) -> tensor<?xf32>
+  //     CHECK: linalg.fill ins(%[[F0]] : f32) inits(%[[A]] : memref<?xf32, strided<[?], offset: ?>>)
+  %r = linalg.fill ins(%f0 : f32) inits(%A : tensor<?xf32>) -> tensor<?xf32>
 
   //     CHECK: return
   // CHECK-NOT: tensor
@@ -45,8 +45,8 @@
 
   //     CHECK: %[[D0:.*]] = memref.dim %[[A]], {{.*}} : memref<?xf32, strided<[?], offset: ?>>
   //     CHECK: %[[ALLOC:.*]] = memref.alloc(%[[D0]]) {alignment = 64 : i64} : memref<?xf32>
-  //     CHECK: linalg.fill ins(%[[F0]] : f32) outs(%[[ALLOC]] : memref<?xf32>)
-  %r = linalg.fill ins(%f0 : f32) outs(%A : tensor<?xf32>) -> tensor<?xf32>
+  //     CHECK: linalg.fill ins(%[[F0]] : f32) inits(%[[ALLOC]] : memref<?xf32>)
+  %r = linalg.fill ins(%f0 : f32) inits(%A : tensor<?xf32>) -> tensor<?xf32>
 
   // CHECK-NOT: dealloc
   //     CHECK: return %[[ALLOC]] : memref<?xf32>
@@ -67,14 +67,14 @@
 
   /// Cross-op multiple uses of %A, the first op which has interfering reads must alloc.
   //       CHECK: %[[ALLOC:.*]] = memref.alloc
-  //       CHECK: linalg.fill ins({{.*}}{{.*}}outs(%[[ALLOC]]
-  %f = linalg.fill ins(%f0 : f32) outs(%A : tensor<?x?xf32>) -> tensor<?x?xf32>
+  //       CHECK: linalg.fill ins({{.*}}{{.*}}inits(%[[ALLOC]]
+  %f = linalg.fill ins(%f0 : f32) inits(%A : tensor<?x?xf32>) -> tensor<?x?xf32>
 
   /// The second op has no interfering reads and can reuse.
   //   CHECK-NOT: alloc
-  //       CHECK: linalg.matmul ins(%[[ALLOC]], %[[ALLOC]]{{.*}}) outs(%[[A]]
+  //       CHECK: linalg.matmul ins(%[[ALLOC]], %[[ALLOC]]{{.*}}) inits(%[[A]]
   %r = linalg.matmul  ins(%f, %f: tensor<?x?xf32>, tensor<?x?xf32>)
-                     outs(%A: tensor<?x?xf32>)
+                     inits(%A: tensor<?x?xf32>)
     -> tensor<?x?xf32>
 
   //     CHECK: memref.dealloc %[[ALLOC]]
@@ -91,7 +91,7 @@
   /// Within op multiple uses of %A, must alloc.
   // CHECK: alloc
   %r = linalg.matmul  ins(%A, %A: tensor<?x?xf32>, tensor<?x?xf32>)
-                     outs(%A: tensor<?x?xf32>)
+                     inits(%A: tensor<?x?xf32>)
     -> tensor<?x?xf32>
   // CHECK-NOT: dealloc
   return %r: tensor<?x?xf32>
@@ -181,8 +181,8 @@
         tensor<128x192xf32> to tensor<8x16xf32>
 
       // linalg.fill is inplace.
-      // CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[C_SLICE]]
-      %5 = linalg.fill ins(%cst : f32) outs(%4 : tensor<8x16xf32>) -> tensor<8x16xf32>
+      // CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[C_SLICE]]
+      %5 = linalg.fill ins(%cst : f32) inits(%4 : tensor<8x16xf32>) -> tensor<8x16xf32>
 
       // CHECK: scf.for %[[K:.*]] =
       %6 = scf.for %arg7 = %c0 to %c256 step %c32 iter_args(%arg8 = %5) -> (tensor<8x16xf32>) {
@@ -192,9 +192,9 @@
           tensor<256x16xf32> to tensor<32x16xf32>
 
         // linalg.matmul is inplace as well as the enclosing scf.for.
-        // CHECK: linalg.matmul ins({{.*}} outs(%[[C_SLICE]]
+        // CHECK: linalg.matmul ins({{.*}} inits(%[[C_SLICE]]
         %10 = linalg.matmul ins(%8, %9 : tensor<8x32xf32>, tensor<32x16xf32>)
-                           outs(%arg8 : tensor<8x16xf32>)
+                           inits(%arg8 : tensor<8x16xf32>)
           -> tensor<8x16xf32>
         scf.yield %10 : tensor<8x16xf32>
       }
@@ -231,7 +231,7 @@
 
   %sA = tensor.extract_slice %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
   %ssA = tensor.extract_slice %sA[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
-  %FA = linalg.fill ins(%f0 : f32) outs(%ssA : tensor<4x4xf32>) -> tensor<4x4xf32>
+  %FA = linalg.fill ins(%f0 : f32) inits(%ssA : tensor<4x4xf32>) -> tensor<4x4xf32>
   %rsA = tensor.insert_slice %FA into %sA[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<?x?xf32>
   %rA = tensor.insert_slice %rsA into %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
 
@@ -250,7 +250,7 @@
       indexing_maps = [affine_map<(d0, d1) -> (d0)>,
                        affine_map<(d0, d1) -> (d0, d1)>],
       iterator_types = ["parallel", "parallel"]}
-      ins(%arg1 : tensor<?xi32>) outs(%arg2 : tensor<?x?xf32>) {
+      ins(%arg1 : tensor<?xi32>) inits(%arg2 : tensor<?x?xf32>) {
       ^bb0(%arg3: i32, %arg4 : f32):
         %iv1 = linalg.index 1 : index
         %1 = arith.index_cast %arg3: i32 to index
@@ -266,7 +266,7 @@
 //  CHECK-SAME:   ) {
 //       CHECK:   linalg.generic
 //  CHECK-SAME:       ins(%[[ARG1]] :
-//  CHECK-SAME:       outs(%[[ARG2]] :
+//  CHECK-SAME:       inits(%[[ARG2]] :
 //       CHECK:     %[[YIELD:.+]] = memref.load %[[ARG0]]
 //       CHECK:     linalg.yield %[[YIELD]]
 
@@ -281,14 +281,14 @@
     %s1: index, %s2: index, %cst: f32)
   -> tensor<?x?xf32>
 {
-  // CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) outs(%[[t3]] : {{.*}})
+  // CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) inits(%[[t3]] : {{.*}})
   %r = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                      affine_map<(d0, d1) -> (d1)>,
                      affine_map<(d0, d1)-> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]}
     ins(%t1, %t2 : tensor<?x?xf32>, tensor<?xf32>)
-    outs(%t3 : tensor<?x?xf32>) {
+    inits(%t3 : tensor<?x?xf32>) {
       ^bb0(%arg0 : f32, %arg1 : f32, %arg2 : f32) :
         %add = arith.addf %arg0, %arg1 : f32
         linalg.yield %add : f32
@@ -316,14 +316,14 @@
   // Make sure that a copy is inserted here.
   // CHECK: %[[ALLOC:.*]] = memref.alloc
   // CHECK: memref.copy %[[t0]], %[[ALLOC]]
-  // CHECK: linalg.generic {{.*}} outs(%[[ALLOC]] : memref
+  // CHECK: linalg.generic {{.*}} inits(%[[ALLOC]] : memref
   %r0 =linalg.generic #trait outs (%t0 : tensor<?xf32>) {
       ^bb(%0: f32) :
         %a = arith.addf %cst, %0 : f32
         linalg.yield %a : f32
     } -> (tensor<?xf32>)
 
-  // CHECK: linalg.generic {{.*}} outs(%[[ALLOC]] : memref
+  // CHECK: linalg.generic {{.*}} inits(%[[ALLOC]] : memref
   %r1 = linalg.generic #trait outs (%r0 : tensor<?xf32>) {
       ^bb(%0: f32) :
         linalg.yield %cst : f32
@@ -343,7 +343,7 @@
    // CHECK:      linalg.map { arith.addf } ins(%[[LHS]], %[[RHS]] : memref<64xf32
    %add = linalg.map
           ins(%lhs, %rhs: tensor<64xf32>, tensor<64xf32>)
-          outs(%init:tensor<64xf32>)
+          inits(%init:tensor<64xf32>)
           (%lhs_elem: f32, %rhs_elem: f32) {
             %0 = arith.addf %lhs_elem, %rhs_elem: f32
             linalg.yield %0: f32
@@ -360,7 +360,7 @@
   // CHECK:     linalg.reduce { arith.addf } ins(%[[INPUT]] : memref<16x32x64xf32
   %reduce = linalg.reduce
       ins(%input:tensor<16x32x64xf32>)
-      outs(%init:tensor<16x64xf32>)
+      inits(%init:tensor<16x64xf32>)
       dimensions = [1]
       (%in: f32, %out: f32) {
         %0 = arith.addf %out, %in: f32
@@ -378,7 +378,7 @@
   // CHECK:      linalg.transpose ins(%[[ARG0]] : memref<16x32x64xf32
   %transpose = linalg.transpose
       ins(%input:tensor<16x32x64xf32>)
-      outs(%init:tensor<32x64x16xf32>)
+      inits(%init:tensor<32x64x16xf32>)
       permutation = [1, 2, 0]
   func.return %transpose : tensor<32x64x16xf32>
 }
@@ -391,7 +391,7 @@
                      %init: tensor<8x16x32xf32>) -> tensor<8x16x32xf32> {
   %bcast = linalg.broadcast
       ins(%input:tensor<8x32xf32>)
-      outs(%init:tensor<8x16x32xf32>)
+      inits(%init:tensor<8x16x32xf32>)
       dimensions = [1]
   func.return %bcast : tensor<8x16x32xf32>
 }
diff --git a/mlir/test/Dialect/Linalg/pad_fusion.mlir b/mlir/test/Dialect/Linalg/pad_fusion.mlir
--- a/mlir/test/Dialect/Linalg/pad_fusion.mlir
+++ b/mlir/test/Dialect/Linalg/pad_fusion.mlir
@@ -10,7 +10,7 @@
   %0 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]}
-    ins(%arg0 : tensor<?x?xf32>) outs(%init : tensor<?x?xf32>) {
+    ins(%arg0 : tensor<?x?xf32>) inits(%init : tensor<?x?xf32>) {
     ^bb0(%arg6 : f32, %arg7 : f32):
       %1 = arith.mulf %arg6, %arg6 : f32
       linalg.yield %1 : f32
@@ -38,13 +38,13 @@
 //  CHECK-DAG:   %[[SOURCE_D1:.+]] = tensor.dim %[[SOURCE]], %[[C1]]
 //  CHECK-DAG:   %[[TARGET_D1:.+]] = affine.apply #[[MAP]]()[%[[ARG2]], %[[ARG4]], %[[SOURCE_D1]]]
 //      CHECK:   %[[INIT:.+]] = tensor.empty(%[[TARGET_D0]], %[[TARGET_D1]])
-//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%[[ARG5]]{{.*}}outs(%[[INIT]]
+//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%[[ARG5]]{{.*}}inits(%[[INIT]]
 //  CHECK-DAG:   %[[SIZE_D0:.+]] = tensor.dim %[[SOURCE]], %[[C0]]
 //  CHECK-DAG:   %[[SIZE_D1:.+]] = tensor.dim %[[SOURCE]], %[[C1]]
 //      CHECK:   %[[SLICE:.+]] = tensor.extract_slice %[[FILL]]
 // CHECK-SAME:       [%[[ARG1]], %[[ARG2]]] [%[[SIZE_D0]], %[[SIZE_D1]]] [1, 1]
 //      CHECK:   %[[SOURCE:.+]] = linalg.generic
-// CHECK-SAME:       outs(%[[SLICE]] : tensor<?x?xf32>)
+// CHECK-SAME:       inits(%[[SLICE]] : tensor<?x?xf32>)
 //      CHECK:   %[[RESULT:.+]] = tensor.insert_slice %[[SOURCE]] into %[[FILL]]
 // CHECK-SAME:       [%[[ARG1]], %[[ARG2]]] [%[[SIZE_D0]], %[[SIZE_D1]]] [1, 1]
 //      CHECK:   return %[[RESULT]]
@@ -59,7 +59,7 @@
   %0 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>],
     iterator_types = ["parallel", "parallel"]}
-    ins(%arg0 : tensor<?x42xf32>) outs(%init : tensor<42x?xf32>) {
+    ins(%arg0 : tensor<?x42xf32>) inits(%init : tensor<42x?xf32>) {
     ^bb0(%arg4 : f32, %arg5 : f32):
       %1 = arith.mulf %arg4, %arg4 : f32
       linalg.yield %1 : f32
@@ -82,12 +82,12 @@
 //  CHECK-DAG:   %[[SOURCE_D1:.+]] = tensor.dim %[[SOURCE]], %[[C1]]
 //  CHECK-DAG:   %[[TARGET_D1:.+]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[ARG2]], %[[SOURCE_D1]]]
 //      CHECK:   %[[INIT:.+]] = tensor.empty(%[[TARGET_D1]])
-//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%[[ARG3]]{{.*}}outs(%[[INIT]]
+//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%[[ARG3]]{{.*}}inits(%[[INIT]]
 //  CHECK-DAG:   %[[SIZE_D1:.+]] = tensor.dim %[[SOURCE]], %[[C1]]
 //      CHECK:   %[[SLICE:.+]] = tensor.extract_slice %[[FILL]]
 // CHECK-SAME:       [3, %[[ARG1]]] [42, %[[SIZE_D1]]] [1, 1]
 //      CHECK:   %[[SOURCE:.+]] = linalg.generic
-// CHECK-SAME:       outs(%[[SLICE]] : tensor<42x?xf32>)
+// CHECK-SAME:       inits(%[[SLICE]] : tensor<42x?xf32>)
 //      CHECK:   %[[RESULT:.+]] = tensor.insert_slice %[[SOURCE]] into %[[FILL]]
 // CHECK-SAME:       [3, %[[ARG1]]] [42, %[[SIZE_D1]]] [1, 1]
 //      CHECK:   return %[[RESULT]]
diff --git a/mlir/test/Dialect/Linalg/parallel-loops.mlir b/mlir/test/Dialect/Linalg/parallel-loops.mlir
--- a/mlir/test/Dialect/Linalg/parallel-loops.mlir
+++ b/mlir/test/Dialect/Linalg/parallel-loops.mlir
@@ -8,7 +8,7 @@
     indexing_maps = [#map0, #map0, #map0],
     iterator_types = ["parallel", "parallel"]}
       ins(%lhs, %rhs : memref<2x2xf32>, memref<2x2xf32>)
-     outs(%sum : memref<2x2xf32>) {
+     inits(%sum : memref<2x2xf32>) {
     ^bb0(%lhs_in: f32, %rhs_in: f32, %sum_out: f32):
       %0 = arith.addf %lhs_in, %rhs_in : f32
       linalg.yield %0 : f32
@@ -41,7 +41,7 @@
 func.func @lower_outer_parallel(%A: memref<?x?x?x?xf32>, %B: memref<?x?x?xf32>) {
   linalg.generic #trait
       ins(%A : memref<?x?x?x?xf32>)
-     outs(%B : memref<?x?x?xf32>) {
+     inits(%B : memref<?x?x?xf32>) {
     ^bb0(%a: f32, %b: f32):
       linalg.yield %a: f32
   }
@@ -74,7 +74,7 @@
 func.func @lower_mixed_parallel(%A: memref<?x?x?x?x?x?xf32>, %B: memref<?x?x?x?xf32>) {
   linalg.generic #trait
       ins(%A : memref<?x?x?x?x?x?xf32>)
-     outs(%B : memref<?x?x?x?xf32>) {
+     inits(%B : memref<?x?x?x?xf32>) {
     ^bb0(%a: f32, %b: f32):
       linalg.yield %a: f32
   }
diff --git a/mlir/test/Dialect/Linalg/promote.mlir b/mlir/test/Dialect/Linalg/promote.mlir
--- a/mlir/test/Dialect/Linalg/promote.mlir
+++ b/mlir/test/Dialect/Linalg/promote.mlir
@@ -25,7 +25,7 @@
         linalg.matmul
           ins(%11, %14: memref<?x?xf32, strided<[?, 1], offset: ?>>,
                         memref<?x?xf32, strided<[?, 1], offset: ?>>)
-         outs(%17: memref<?x?xf32, strided<[?, 1], offset: ?>>)
+         inits(%17: memref<?x?xf32, strided<[?, 1], offset: ?>>)
       }
     }
   }
@@ -56,7 +56,7 @@
 //       CHECK:         memref.copy %[[vB]], %[[partialB]] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
 //       CHECK:         memref.copy %[[vC]], %[[partialC]] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
 //
-//       CHECK:         linalg.matmul ins(%[[partialA]], %[[partialB]]{{.*}} outs(%[[partialC]]
+//       CHECK:         linalg.matmul ins(%[[partialA]], %[[partialB]]{{.*}} inits(%[[partialC]]
 //
 //       CHECK:         memref.copy %[[partialC]], %[[vC]] :
 //       CHECK:           memref<?x?xf32, strided<[?, 1], offset: ?>> to
@@ -95,7 +95,7 @@
         linalg.matmul
           ins(%11, %14: memref<?x?xf64, strided<[?, 1], offset: ?>>,
                         memref<?x?xf64, strided<[?, 1], offset: ?>>)
-         outs(%17: memref<?x?xf64, strided<[?, 1], offset: ?>>)
+         inits(%17: memref<?x?xf64, strided<[?, 1], offset: ?>>)
       }
     }
   }
@@ -126,7 +126,7 @@
 //       CHECK:         memref.copy %[[vB_f64]], %[[partialB_f64]] : memref<?x?xf64, strided<[?, 1], offset: ?>> to memref<?x?xf64, strided<[?, 1], offset: ?>>
 //       CHECK:         memref.copy %[[vC_f64]], %[[partialC_f64]] : memref<?x?xf64, strided<[?, 1], offset: ?>> to memref<?x?xf64, strided<[?, 1], offset: ?>>
 //
-//       CHECK:         linalg.matmul ins(%[[partialA_f64]], %[[partialB_f64]]{{.*}} outs(%[[partialC_f64]]
+//       CHECK:         linalg.matmul ins(%[[partialA_f64]], %[[partialB_f64]]{{.*}} inits(%[[partialC_f64]]
 //
 //       CHECK:         memref.copy %[[partialC_f64]], %[[vC_f64]] :
 //       CHECK:           memref<?x?xf64, strided<[?, 1], offset: ?>> to
@@ -146,7 +146,7 @@
 func.func @gemm_shared(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
 {
    linalg.matmul ins(%a, %b: memref<?x?xf32>, memref<?x?xf32>)
-               outs(%c: memref<?x?xf32>)
+               inits(%c: memref<?x?xf32>)
    return
 }
 
@@ -177,7 +177,7 @@
 // CHECK-NEXT:    memref.copy %[[subview_B]], %[[shared_B]] :  memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[16, 1]>, #gpu.address_space<workgroup>>
 // CHECK-NEXT:    gpu.barrier
 
-// CHECK:         linalg.matmul ins(%[[shared_A]], %[[shared_B]]{{.*}} outs(%[[subview_C]]
+// CHECK:         linalg.matmul ins(%[[shared_A]], %[[shared_B]]{{.*}} inits(%[[subview_C]]
 
 
 transform.sequence failures(propagate) {
@@ -193,7 +193,7 @@
 func.func @gemm_private(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
 {
    linalg.matmul ins(%a, %b: memref<?x?xf32>, memref<?x?xf32>)
-               outs(%c: memref<?x?xf32>)
+               inits(%c: memref<?x?xf32>)
    return
 }
 
@@ -219,7 +219,7 @@
 // CHECK-NEXT:    memref.copy %[[subview_A]], %[[private_A]] :  memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[16, 1]>, #gpu.address_space<private>>
 // CHECK-NEXT:    memref.copy %[[subview_B]], %[[private_B]] :  memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[16, 1]>, #gpu.address_space<private>>
 
-// CHECK:         linalg.matmul ins(%[[private_A]], %[[private_B]]{{.*}} outs(%[[subview_C]]
+// CHECK:         linalg.matmul ins(%[[private_A]], %[[private_B]]{{.*}} inits(%[[subview_C]]
 
 
 transform.sequence failures(propagate) {
@@ -258,9 +258,9 @@
   // CHECK-COUNT-3: memref.copy
   // CHECK: linalg.generic
   // CHECK-SAME: ins(%[[a_pro_subview]], %[[b_pro_subview]]
-  // CHECK-SAME: outs(%[[c_pro_subview]]
+  // CHECK-SAME: inits(%[[c_pro_subview]]
 
-  linalg.generic {indexing_maps = [#map6, #map7, #map8], iterator_types = ["parallel", "parallel", "reduction"]} ins(%13, %14 : memref<?x32xf32, strided<[?, ?], offset: ?>>, memref<128x32xf32, strided<[?, ?], offset: ?>>) outs(%9 : memref<?x128xf32, strided<[128, 1], offset: ?>>) {
+  linalg.generic {indexing_maps = [#map6, #map7, #map8], iterator_types = ["parallel", "parallel", "reduction"]} ins(%13, %14 : memref<?x32xf32, strided<[?, ?], offset: ?>>, memref<128x32xf32, strided<[?, ?], offset: ?>>) inits(%9 : memref<?x128xf32, strided<[128, 1], offset: ?>>) {
   ^bb0(%arg9: f32, %arg10: f32, %arg11: f32):
     %15 = arith.mulf %arg9, %arg10 : f32
     %16 = arith.addf %arg11, %15 : f32
diff --git a/mlir/test/Dialect/Linalg/promotion_options.mlir b/mlir/test/Dialect/Linalg/promotion_options.mlir
--- a/mlir/test/Dialect/Linalg/promotion_options.mlir
+++ b/mlir/test/Dialect/Linalg/promotion_options.mlir
@@ -3,7 +3,7 @@
 func.func @gemm(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
 {
    linalg.matmul ins(%a, %b: memref<?x?xf32>, memref<?x?xf32>)
-               outs(%c: memref<?x?xf32>)
+               inits(%c: memref<?x?xf32>)
    return
 }
 
@@ -29,7 +29,7 @@
 
 //      CHECK:       memref.copy %[[svA]], %[[svAA]]
 //      CHECK:       memref.copy %[[svC]], %[[svCC]]
-//      CHECK:       linalg.matmul ins(%[[VA]], %[[svB]]{{.*}} outs(%[[VC]]
+//      CHECK:       linalg.matmul ins(%[[VA]], %[[svB]]{{.*}} inits(%[[VC]]
 //      CHECK:       memref.copy %[[svCC]], %[[svC]]
 //      CHECK:       memref.dealloc %[[tmpA]]
 //      CHECK:       memref.dealloc %[[tmpC]]
diff --git a/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir b/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir
--- a/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir
+++ b/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir
@@ -11,7 +11,7 @@
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>],
       iterator_types = ["parallel", "parallel"]}
       ins(%0, %arg1 : tensor<?x?xf32>, tensor<?xf32>)
-      outs(%init : tensor<?x?xf32>) {
+      inits(%init : tensor<?x?xf32>) {
       ^bb0(%arg2 : f32, %arg3:f32, %arg4 : f32):
         %2 = arith.addf %arg2, %arg3 : f32
         linalg.yield %2 : f32
@@ -44,19 +44,19 @@
   %fill = linalg.generic {
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>],
       iterator_types = ["parallel", "parallel"]}
-      outs(%init : tensor<?x?xf32>) {
+      inits(%init : tensor<?x?xf32>) {
       ^bb0(%arg2: f32):
         linalg.yield %cst : f32
       } -> tensor<?x?xf32>
   %0 = tensor.expand_shape %fill [[0, 1], [2]] : tensor<?x?xf32> into tensor<1x?x?xf32>
   %1 = linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x?x?xf32>, tensor<1x?x?xf32>)
-      outs(%0 : tensor<1x?x?xf32>) -> tensor<1x?x?xf32>
+      inits(%0 : tensor<1x?x?xf32>) -> tensor<1x?x?xf32>
   return %1 : tensor<1x?x?xf32>
 }
 //  CHECK-DAG: #[[MAP:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)
 //      CHECK: func @control_consumer_reshape_fusion
 //      CHECK:   %[[FILL:.+]] = linalg.generic
 // CHECK-SAME:       indexing_maps = [#[[MAP]]]
-// CHECK-SAME:       outs(%{{.+}} : tensor<1x?x?xf32>)
+// CHECK-SAME:       inits(%{{.+}} : tensor<1x?x?xf32>)
 //      CHECK:   linalg.batch_matmul
-// CHECK-SAME:       outs(%[[FILL]] : tensor<1x?x?xf32>)
+// CHECK-SAME:       inits(%[[FILL]] : tensor<1x?x?xf32>)
diff --git a/mlir/test/Dialect/Linalg/reshape_fusion.mlir b/mlir/test/Dialect/Linalg/reshape_fusion.mlir
--- a/mlir/test/Dialect/Linalg/reshape_fusion.mlir
+++ b/mlir/test/Dialect/Linalg/reshape_fusion.mlir
@@ -14,7 +14,7 @@
      indexing_maps = [#map0, #map1, #map2, #map1],
      iterator_types = ["parallel", "parallel", "parallel"]}
        ins(%0, %arg1, %arg2 : tensor<?x?x?xf32>, tensor<?x?x?xf32>, f32)
-       outs(%arg1 : tensor<?x?x?xf32>) {
+       inits(%arg1 : tensor<?x?x?xf32>) {
     ^bb0(%arg3: f32, %arg4: f32, %arg5: f32, %s: f32):
       %1 = arith.mulf %arg3, %arg4 : f32
       %2 = arith.addf %1, %arg5 : f32
@@ -38,7 +38,7 @@
 // CHECK-SAME:     indexing_maps = [#[[MAP5]], #[[MAP6]], #[[MAP7]], #[[MAP6]]]
 // CHECK-SAME:     ["parallel", "parallel", "parallel", "parallel"]
 // CHECK-SAME:     ins(%[[ARG0]], %[[T1]], %[[ARG2]] : tensor<?x?x4x?xf32>, tensor<?x?x?x4xf32>, f32)
-// CHECK-SAME:     outs(%[[T2]] : tensor<?x?x?x4xf32>)
+// CHECK-SAME:     inits(%[[T2]] : tensor<?x?x?x4xf32>)
 //      CHECK:   %[[T4:.+]] = tensor.collapse_shape %[[T3]]
 // CHECK-SAME:     [0], [1], [2, 3]
 // CHECK-SAME:     tensor<?x?x?x4xf32> into tensor<?x?x?xf32>
@@ -57,7 +57,7 @@
      indexing_maps = [#map0, #map0, #map1, #map0],
      iterator_types = ["parallel", "parallel"]}
        ins(%arg0, %arg1, %arg2 : tensor<?x?xf32>, tensor<?x?xf32>, f32)
-       outs(%arg0 : tensor<?x?xf32>) {
+       inits(%arg0 : tensor<?x?xf32>) {
     ^bb0(%arg3: f32, %arg4: f32, %arg5: f32, %s: f32):
       %1 = arith.mulf %arg3, %arg4 : f32
       %2 = arith.addf %1, %arg5 : f32
@@ -87,7 +87,7 @@
 // CHECK-SAME:     indexing_maps = [#[[MAP2]], #[[MAP2]], #[[MAP3]], #[[MAP2]]]
 // CHECK-SAME:     ["parallel", "parallel", "parallel", "parallel"]
 // CHECK-SAME:     ins(%[[T0]], %[[T1]], %[[ARG2]] : tensor<?x4x?x5xf32>, tensor<?x4x?x5xf32>, f32)
-// CHECK-SAME:     outs(%[[T2]] : tensor<?x4x?x5xf32>)
+// CHECK-SAME:     inits(%[[T2]] : tensor<?x4x?x5xf32>)
 //      CHECK:   return %[[T3]] : tensor<?x4x?x5xf32>
 
 
@@ -102,7 +102,7 @@
                           affine_map<(d0, d1, d2) -> (d0, d2, d1)>],
          iterator_types = ["parallel", "parallel", "parallel"]}
           ins(%a, %b : tensor<?x?x?xf32>, tensor<?x?xf32>)
-         outs(%a : tensor<?x?x?xf32>) {
+         inits(%a : tensor<?x?x?xf32>) {
        ^bb0(%arg0 : f32, %arg1: f32, %s: f32):
          %1 = arith.addf %arg0, %arg1 : f32
          linalg.yield %1 : f32
@@ -130,7 +130,7 @@
 // CHECK-SAME:     indexing_maps = [#[[MAP8]], #[[MAP9]], #[[MAP10]]]
 // CHECK-SAME:     ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]
 // CHECK-SAME:     ins(%[[T0]], %[[T1]] : tensor<3x4x?x?x2x?xf32>, tensor<3x4x?x?xf32>)
-// CHECK-SAME:     outs(%[[T2]] : tensor<?x2x?x3x4x?xf32>)
+// CHECK-SAME:     inits(%[[T2]] : tensor<?x2x?x3x4x?xf32>)
 //      CHECK:   return %[[T3]] : tensor<?x2x?x3x4x?xf32>
 
 // -----
@@ -147,7 +147,7 @@
      indexing_maps = [#map0, #map0, #map0],
      iterator_types = ["parallel", "parallel"]}
        ins(%arg0, %cst : tensor<264x4xf32>, tensor<264x4xf32>)
-       outs(%0 : tensor<264x4xf32>) {
+       inits(%0 : tensor<264x4xf32>) {
     ^bb0(%arg1: f32, %arg2: f32, %s: f32):
       %2 = arith.mulf %arg1, %arg2 : f32
       linalg.yield %2 : f32
@@ -173,7 +173,7 @@
 // CHECK-SAME:     indexing_maps = [#[[MAP2]], #[[MAP2]], #[[MAP2]]]
 // CHECK-SAME:     ["parallel", "parallel", "parallel"]
 // CHECK-SAME:     ins(%[[T0]], %[[CST]] :
-// CHECK-SAME:     outs(%[[T1]] : tensor<8x33x4xf32>)
+// CHECK-SAME:     inits(%[[T1]] : tensor<8x33x4xf32>)
 //      CHECK:   return %[[T2]] : tensor<8x33x4xf32>
 
 // -----
@@ -190,7 +190,7 @@
      indexing_maps = [#map0, #map1, #map1],
      iterator_types = ["parallel", "parallel", "parallel"]}
        ins(%0, %arg1 : tensor<?x?x?xi32>, tensor<?x?x?xi32>)
-      outs(%0 : tensor<?x?x?xi32>) {
+      inits(%0 : tensor<?x?x?xi32>) {
     ^bb0(%arg3: i32, %arg4: i32, %s: i32):
       %idx0 = linalg.index 0 : index
       %idx1 = linalg.index 1 : index
@@ -239,7 +239,7 @@
      indexing_maps = [#map0, #map0, #map0],
      iterator_types = ["parallel", "parallel"]}
        ins(%arg0, %arg1 : tensor<?x?xi32>, tensor<?x?xi32>)
-      outs(%arg0 : tensor<?x?xi32>) {
+      inits(%arg0 : tensor<?x?xi32>) {
     ^bb0(%arg3: i32, %arg4: i32, %s: i32):
       %idx0 = linalg.index 0 : index
       %idx1 = linalg.index 1 : index
@@ -288,7 +288,7 @@
                           affine_map<(d0, d1, d2) -> (d0, d2, d1)>],
          iterator_types = ["parallel", "parallel", "parallel"]}
           ins(%a, %b : tensor<210x6x4xi32>, tensor<210x4xi32>)
-          outs(%shape : tensor<6x4x210xi32>) {
+          inits(%shape : tensor<6x4x210xi32>) {
        ^bb0(%arg3 : i32, %arg4: i32, %s: i32):
          %idx0 = linalg.index 0 : index
          %idx1 = linalg.index 1 : index
@@ -329,7 +329,7 @@
 //       CHECK:   %[[T4:.+]] = linalg.generic
 //  CHECK-SAME:     indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
 //  CHECK-SAME:     ins(%[[T1]], %[[T2]] : tensor<5x6x7x2x3x4xi32>, tensor<5x6x7x4xi32>)
-//  CHECK-SAME:     outs(%[[T3]] : tensor<2x3x4x5x6x7xi32>)
+//  CHECK-SAME:     inits(%[[T3]] : tensor<2x3x4x5x6x7xi32>)
 //       CHECK:   ^{{.+}}(
 //  CHECK-SAME:     %[[ARG8:[a-zA-Z0-9_]+]]: i32, %[[ARG9:[a-zA-Z0-9_]+]]: i32,
 //  CHECK-SAME:     %[[ARG10:[a-zA-Z0-9_]+]]: i32)
@@ -362,7 +362,7 @@
                       affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
      iterator_types = ["parallel", "parallel", "parallel"]}
      ins(%0 : tensor<264x?xi32>)
-    outs(%shape : tensor<264x?x4xi32>) {
+    inits(%shape : tensor<264x?x4xi32>) {
   ^bb0(%arg1: i32, %s: i32):
     %idx0 = linalg.index 0 : index
     %idx1 = linalg.index 1 : index
@@ -418,7 +418,7 @@
      indexing_maps = [#map0, #map0, #map1],
      iterator_types = ["parallel", "parallel"]}
        ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-       outs(%arg0 : tensor<?x?xf32>) {
+       inits(%arg0 : tensor<?x?xf32>) {
     ^bb0(%arg3: f32, %arg4: f32, %s: f32):
       %1 = arith.mulf %arg3, %arg4 : f32
       linalg.yield %1 : f32
@@ -446,7 +446,7 @@
 // CHECK-SAME:     indexing_maps = [#[[MAP4]], #[[MAP4]], #[[MAP5]]]
 // CHECK-SAME:     ["parallel", "parallel", "parallel", "parallel"]
 // CHECK-SAME:     ins(%[[T0]], %[[T1]] : tensor<?x4x5x?xf32>, tensor<?x4x5x?xf32>)
-// CHECK-SAME:     outs(%[[T2]] : tensor<?x?x4x5xf32>)
+// CHECK-SAME:     inits(%[[T2]] : tensor<?x?x4x5xf32>)
 //      CHECK:   return %[[T3]] : tensor<?x?x4x5xf32>
 
 // -----
@@ -459,7 +459,7 @@
   %3 = linalg.generic {
     indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
     iterator_types = ["parallel"]}
-    ins(%0 : tensor<?xf32>) outs(%2 : tensor<?xf32>) {
+    ins(%0 : tensor<?xf32>) inits(%2 : tensor<?xf32>) {
       ^bb0(%arg1 : f32, %arg2: f32):
         %4 = arith.addf %arg1, %arg1 : f32
         linalg.yield %4 : f32
@@ -484,7 +484,7 @@
                       affine_map<(d0) -> (d0)>],
      iterator_types = ["parallel"]}
     ins(%0, %arg1 : tensor<2xi64>, tensor<?xi64>)
-    outs(%1 : tensor<2xi64>) {
+    inits(%1 : tensor<2xi64>) {
   ^bb0(%arg4: i64, %arg5: i64, %arg6: i64):
     %3 = arith.addi %arg4, %arg5 : i64
     linalg.yield %3 : i64
@@ -512,7 +512,7 @@
                           affine_map<(d0, d1, d2) -> (d2, d0, d1)>],
          iterator_types = ["parallel", "parallel", "parallel"]}
           ins(%a, %b : tensor<?x?x?xf32>, tensor<?x?xf32>)
-         outs(%a, %a : tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
+         inits(%a, %a : tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
        ^bb0(%arg0 : f32, %arg1: f32, %s: f32, %t : f32):
          %1 = arith.addf %arg0, %arg1 : f32
          linalg.yield %1, %1 : f32, f32
@@ -537,7 +537,7 @@
 //      CHECK:  %[[GENERIC:.+]]:2 = linalg.generic
 // CHECK-SAME:      indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]]]
 // CHECK-SAME:      ins(%[[RESHAPE0]], %[[RESHAPE1]] :
-// CHECK-SAME:      outs(%[[RESHAPE2]], %[[RESHAPE3]] :
+// CHECK-SAME:      inits(%[[RESHAPE2]], %[[RESHAPE3]] :
 //      CHECK:  return %[[GENERIC]]#0, %[[GENERIC]]#1
 
 // -----
@@ -551,7 +551,7 @@
         indexing_maps = [#map0, #map0, #map0, #map1],
         iterator_types = ["parallel", "parallel"]}
         ins(%arg0, %arg1 : tensor<512xf32>, tensor<512xf32>)
-        outs(%arg2, %arg3 : tensor<512xf32>, tensor<200x512xf32>) {
+        inits(%arg2, %arg3 : tensor<512xf32>, tensor<200x512xf32>) {
       ^bb0(%arg4: f32, %arg5: f32, %arg6: f32, %arg7: f32):
         %2 = arith.addf %arg4, %arg5 : f32
         linalg.yield %2, %2 : f32, f32
@@ -571,5 +571,5 @@
 //      CHECK:   %[[GENERIC:.+]]:2 = linalg.generic
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP0]], #[[MAP0]], #[[MAP1]]]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]] :
-// CHECK-SAME:       outs(%[[ARG2]], %[[OUTS]] :
+// CHECK-SAME:       inits(%[[ARG2]], %[[OUTS]] :
 //      CHECK:   return %[[GENERIC]]#1
diff --git a/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir b/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir
--- a/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir
+++ b/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir
@@ -54,7 +54,7 @@
                       affine_map<(d0, d1, d2) -> (d0 + d1, d1 - d0)>],
      iterator_types = ["parallel", "parallel", "reduction"]}
     ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%arg2 : tensor<?x?xf32>) {
+    inits(%arg2 : tensor<?x?xf32>) {
     ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
       %1 = arith.mulf %arg3, %arg4 : f32
       %2 = arith.addf %1, %arg5 : f32
@@ -92,7 +92,7 @@
     {indexing_maps = [affine_map<(d0, d1) -> (d0)>,
                       affine_map<(d0, d1) -> (d0, d1)>],
      iterator_types = ["parallel", "parallel"]}
-    ins(%arg0 : tensor<?xf32>) outs(%0 : tensor<?x?xf32>) {
+    ins(%arg0 : tensor<?xf32>) inits(%0 : tensor<?x?xf32>) {
     ^bb0(%arg2: f32, %arg3: f32) :
       linalg.yield %arg2 : f32
     } -> tensor<?x?xf32>
@@ -111,7 +111,7 @@
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
   %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
+    inits(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %1 = tensor.dim %0, %c0 : tensor<?x?xf32>
   %2 = tensor.dim %0, %c1 : tensor<?x?xf32>
   %3 = linalg.generic
@@ -120,7 +120,7 @@
                       affine_map<(d0, d1, d2) -> (d0, d2)>],
      iterator_types = ["parallel", "reduction", "parallel"]}
     ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%0 : tensor<?x?xf32>) {
+    inits(%0 : tensor<?x?xf32>) {
     ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
       %4 = arith.mulf %arg3, %arg4 : f32
       %5 = arith.addf %4, %arg5 : f32
@@ -154,7 +154,7 @@
     {indexing_maps = [affine_map<(d0, d1) -> (d0)>,
                       affine_map<(d0, d1) -> (d0, d1)>],
      iterator_types = ["parallel", "parallel"]}
-    ins(%arg0 : tensor<?xf32>) outs(%0 : tensor<?x?xf32>) {
+    ins(%arg0 : tensor<?xf32>) inits(%0 : tensor<?x?xf32>) {
     ^bb0(%arg2: f32, %arg3 : f32):
       linalg.yield %arg2 : f32
     } -> tensor<?x?xf32>
@@ -179,7 +179,7 @@
     indexing_maps = [#map, #map, #map],
     iterator_types = ["parallel"]
   } ins(%arg_0 : tensor<?xf32>)
-    outs(%arg_0, %arg_1 : tensor<?xf32>, tensor<?xf32>) {
+    inits(%arg_0, %arg_1 : tensor<?xf32>, tensor<?xf32>) {
   ^bb0(%in: f32, %out_0: f32, %out_1: f32):
     linalg.yield %in, %in : f32, f32
   } -> (tensor<?xf32>, tensor<?xf32>)
diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir
--- a/mlir/test/Dialect/Linalg/roundtrip.mlir
+++ b/mlir/test/Dialect/Linalg/roundtrip.mlir
@@ -32,38 +32,38 @@
           %arg3: memref<f32>) {
   linalg.matmul ins(%arg0, %arg0 : memref<?x?xf32, strided<[?, 1], offset: ?>>,
                                    memref<?x?xf32, strided<[?, 1], offset: ?>>)
-               outs(%arg0 : memref<?x?xf32, strided<[?, 1], offset: ?>>)
+               inits(%arg0 : memref<?x?xf32, strided<[?, 1], offset: ?>>)
   linalg.matvec ins(%arg0, %arg1: memref<?x?xf32, strided<[?, 1], offset: ?>>,
                                   memref<?xf32, strided<[1], offset: ?>>)
-               outs(%arg2: memref<?xf32, strided<[1], offset: ?>>)
+               inits(%arg2: memref<?xf32, strided<[1], offset: ?>>)
   linalg.dot ins(%arg1, %arg2: memref<?xf32, strided<[1], offset: ?>>,
                                memref<?xf32, strided<[1], offset: ?>>)
-            outs(%arg3: memref<f32>)
+            inits(%arg3: memref<f32>)
   return
 }
 // CHECK-LABEL: func @ops(%
 // CHECK: linalg.matmul
 // CHECK-SAME:   ins(%{{.*}}, %{{.*}} : memref<?x?xf32, strided<[?, 1], offset: ?>>,
 // CHECK-SAME:                          memref<?x?xf32, strided<[?, 1], offset: ?>>)
-// CHECK-SAME:  outs(%{{.*}} : memref<?x?xf32, strided<[?, 1], offset: ?>>)
+// CHECK-SAME:  inits(%{{.*}} : memref<?x?xf32, strided<[?, 1], offset: ?>>)
 // CHECK: linalg.matvec
 // CHECK-SAME:   ins(%{{.*}}, %{{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>,
 // CHECK-SAME:                         memref<?xf32, strided<[1], offset: ?>>)
-// CHECK-SAME:  outs(%{{.*}}: memref<?xf32, strided<[1], offset: ?>>)
+// CHECK-SAME:  inits(%{{.*}}: memref<?xf32, strided<[1], offset: ?>>)
 // CHECK: linalg.dot
 // CHECK-SAME:   ins(%{{.*}}, %{{.*}}: memref<?xf32, strided<[1], offset: ?>>,
 // CHECK-SAME:                         memref<?xf32, strided<[1], offset: ?>>)
-// CHECK-SAME:  outs(%{{.*}}: memref<f32>)
+// CHECK-SAME:  inits(%{{.*}}: memref<f32>)
 
 // -----
 
 func.func @fill_view(%arg0: memref<?xf32, strided<[1], offset: ?>>, %arg1: f32) {
-  linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<?xf32, strided<[1], offset: ?>>)
+  linalg.fill ins(%arg1 : f32) inits(%arg0 : memref<?xf32, strided<[1], offset: ?>>)
   return
 }
 // CHECK-LABEL: func @fill_view(
 //       CHECK:  %{{.*}}: memref<?xf32, strided<[1], offset: ?>>, %{{.*}}: f32) {
-//       CHECK:   linalg.fill ins(%{{.*}} : f32) outs(%{{.*}} : memref<?xf32, strided<[1], offset: ?>>)
+//       CHECK:   linalg.fill ins(%{{.*}} : f32) inits(%{{.*}} : memref<?xf32, strided<[1], offset: ?>>)
 
 // -----
 
@@ -79,12 +79,12 @@
 
 
 func.func @fill_view3(%arg0: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>, %arg1: f32) {
-  linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
+  linalg.fill ins(%arg1 : f32) inits(%arg0 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
   return
 }
 // CHECK-LABEL: func @fill_view3(
 //       CHECK:  %{{.*}}: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>, %{{.*}}: f32) {
-//       CHECK:   linalg.fill ins(%{{.*}} : f32) outs(%{{.*}} : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
+//       CHECK:   linalg.fill ins(%{{.*}} : f32) inits(%{{.*}} : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
 
 // -----
 
@@ -105,7 +105,7 @@
   %cst = arith.constant 0.0 : f32
   linalg.generic #trait_0
        ins(%arg0, %cst : memref<?x?xvector<3x4xi4>, strided<[?, 1], offset: ?>>, f32)
-      outs(%arg1 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
+      inits(%arg1 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
       attrs = {foo = 1} {
     ^bb(%0: vector<3x4xi4>, %1: f32, %2: f32) :
       linalg.yield %1 : f32
@@ -118,7 +118,7 @@
 //  CHECK-SAME:     iterator_types = ["parallel", "parallel", "parallel"],
 //  CHECK-SAME:     library_call = "some_external_function_name_1"}
 //  CHECK-SAME:      ins({{.*}}, {{.*}} : memref<?x?xvector<3x4xi4>, strided<[?, 1], offset: ?>>, f32)
-//  CHECK-SAME:     outs({{.*}} : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
+//  CHECK-SAME:     inits({{.*}} : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
 //  CHECK-SAME:     {foo = 1 : i64}
 
 // -----
@@ -127,7 +127,7 @@
 func.func @generic_without_inputs(%arg0 : memref<?x?x?xf32>) {
   linalg.generic  {indexing_maps = [#map0],
                    iterator_types = ["parallel", "parallel", "parallel"]}
-                  outs(%arg0 : memref<?x?x?xf32>) {
+                  inits(%arg0 : memref<?x?x?xf32>) {
    ^bb0(%arg3: f32):
       %cst = arith.constant 0.000000e+00 : f32
       linalg.yield %cst : f32
@@ -158,7 +158,7 @@
     -> (tensor<?x?x?xf32>) {
   %0 = linalg.generic #trait_1
        ins(%arg0, %arg1 : tensor<?x?xvector<3x4xi4>>, tensor<?x?x?xf32>)
-      outs(%arg1 : tensor<?x?x?xf32>)
+      inits(%arg1 : tensor<?x?x?xf32>)
       attrs = {foo = 1} {
     ^bb(%0: vector<3x4xi4>, %1: f32, %2: f32) :
       %f0 = arith.constant 0.0 : f32
@@ -171,7 +171,7 @@
 //  CHECK-SAME:     indexing_maps = [#{{.*}}, #{{.*}}], iterator_types = ["parallel", "parallel", "parallel"],
 //  CHECK-SAME:     library_call = "some_external_function_name_1"}
 //  CHECK-SAME:      ins({{.*}} : tensor<?x?xvector<3x4xi4>>, tensor<?x?x?xf32>)
-//  CHECK-SAME:     outs({{.*}} : tensor<?x?x?xf32>)
+//  CHECK-SAME:     inits({{.*}} : tensor<?x?x?xf32>)
 //  CHECK-SAME:     {foo = 1 : i64}
 //       CHECK:     -> tensor<?x?x?xf32>
 //       CHECK:   return {{.*}} : tensor<?x?x?xf32>
@@ -183,14 +183,14 @@
     -> (tensor<i32>, tensor<i32>) {
   %c0 = arith.constant 0 : index
   %0 = tensor.empty() : tensor<i32>
-  %1 = linalg.fill ins(%arg2 : i32) outs(%0 : tensor<i32>) -> tensor<i32>
+  %1 = linalg.fill ins(%arg2 : i32) inits(%0 : tensor<i32>) -> tensor<i32>
   %2 = tensor.empty() : tensor<i32>
-  %3 = linalg.fill ins(%arg2 : i32) outs(%2 : tensor<i32>) -> tensor<i32>
+  %3 = linalg.fill ins(%arg2 : i32) inits(%2 : tensor<i32>) -> tensor<i32>
   %4:2 = linalg.generic {
     indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>, affine_map<(d0) -> ()>],
     iterator_types = ["reduction"]}
     ins(%arg0, %arg1 : tensor<?xi32>, tensor<?xi32>)
-    outs(%1, %3 : tensor<i32>, tensor<i32>) {
+    inits(%1, %3 : tensor<i32>, tensor<i32>) {
   ^bb0(%arg3: i32, %arg4: i32, %arg5: i32, %arg6: i32):
     %5 = arith.cmpi sge, %arg3, %arg5 : i32
     %6 = arith.select %5, %arg3, %arg5 : i32
@@ -206,7 +206,7 @@
 // CHECK-LABEL: func @generic_with_multiple_tensor_outputs
 //       CHECK:   %{{.*}} = linalg.generic {
 //  CHECK-SAME:      ins({{.*}} : tensor<?xi32>, tensor<?xi32>)
-//  CHECK-SAME:     outs({{.*}} : tensor<i32>, tensor<i32>)
+//  CHECK-SAME:     inits({{.*}} : tensor<i32>, tensor<i32>)
 //       CHECK:   } -> (tensor<i32>, tensor<i32>)
 
 // -----
@@ -226,7 +226,7 @@
 {
   %0 = linalg.generic #trait_broadcast
        ins(%arg0 : tensor<f32>)
-      outs(%arg1 : tensor<3x4xf32>) {
+      inits(%arg1 : tensor<3x4xf32>) {
     ^bb(%a: f32, %b: f32) :
       linalg.yield %a : f32
   } -> tensor<3x4xf32>
@@ -251,7 +251,7 @@
                      %arg1: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>) {
   linalg.generic #trait_3
        ins(%arg0 : memref<?x?xvector<3x4xi4>, strided<[?, 1], offset: ?>>)
-      outs(%arg1 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
+      inits(%arg1 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
       attrs = {foo = 1} {
     ^bb(%a: vector<3x4xi4>, %b: f32) :
       %0 = linalg.index 0 : index
@@ -267,7 +267,7 @@
 //  CHECK-SAME:     iterator_types = ["parallel", "parallel", "parallel"],
 //  CHECK-SAME:     library_call = "some_external_function_name_2"
 //  CHECK-SAME:      ins({{.*}} : memref<?x?xvector<3x4xi4>, strided<[?, 1], offset: ?>>)
-//  CHECK-SAME:     outs({{.*}} : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
+//  CHECK-SAME:     inits({{.*}} : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
 //  CHECK-SAME:     attrs = {foo = 1 : i64} {
 //       CHECK:  ^{{.*}}(%{{.*}}: vector<3x4xi4>, %{{.*}}: f32):
 //       CHECK:    %{{.*}} = linalg.index 0 : index
@@ -283,10 +283,10 @@
   -> (tensor<?x?x?xf32>)
 {
   linalg.batch_matmul ins(%a3, %b3: memref<?x?x?xf32>, memref<?x?x?xf32>)
-                     outs(%c3: memref<?x?x?xf32>)
+                     inits(%c3: memref<?x?x?xf32>)
   %res1 = linalg.batch_matmul
                       ins(%ta3, %tb3: tensor<?x?x?xf32>, tensor<?x?x?xf32>)
-                     outs(%tc3: tensor<?x?x?xf32>)
+                     inits(%tc3: tensor<?x?x?xf32>)
                   -> tensor<?x?x?xf32>
   return %res1 : tensor<?x?x?xf32>
 }
@@ -298,10 +298,10 @@
 
 func.func @fill_tensor(%arg0 : index, %arg1 : index, %arg2 : f32) -> tensor<?x?xf32> {
   %0 = tensor.empty(%arg0, %arg1) : tensor<?x?xf32>
-  %1 = linalg.fill ins(%arg2 : f32) outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %1 = linalg.fill ins(%arg2 : f32) inits(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
   return %1 : tensor<?x?xf32>
 }
-// CHECK: %{{.+}} = linalg.fill ins(%{{.+}} : f32) outs(%{{.+}} : tensor<?x?xf32>) -> tensor<?x?xf32>
+// CHECK: %{{.+}} = linalg.fill ins(%{{.+}} : f32) inits(%{{.+}} : tensor<?x?xf32>) -> tensor<?x?xf32>
 
 // -----
 
@@ -313,7 +313,7 @@
                        affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>],
       iterator_types = ["parallel", "parallel", "reduction"]}
       ins(%arg0, %arg1 : tensor<?x?x?xf32>, tensor<?x?xf32>)
-      outs(%arg2, %arg3 : tensor<?x?x?xf32>, tensor<?x?xf32>) {
+      inits(%arg2, %arg3 : tensor<?x?x?xf32>, tensor<?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32):
       %1 = arith.mulf %b0, %b1 : f32
       %2 = arith.addf %1, %b3 : f32
@@ -328,7 +328,7 @@
 
 func.func @map_no_inputs(%init: tensor<64xf32>) -> tensor<64xf32> {
    %add = linalg.map
-      outs(%init:tensor<64xf32>)
+      inits(%init:tensor<64xf32>)
       () {
         %0 = arith.constant 0.0: f32
         linalg.yield %0: f32
@@ -336,7 +336,7 @@
   func.return %add : tensor<64xf32>
 }
 // CHECK-LABEL: func @map_no_inputs
-//       CHECK:   linalg.map outs
+//       CHECK:   linalg.map inits
 //  CHECK-NEXT:   () {
 //  CHECK-NEXT:     arith.constant
 //  CHECK-NEXT:     linalg.yield
@@ -348,7 +348,7 @@
                       %init: tensor<64xf32>) -> tensor<64xf32> {
    %add = linalg.map
           ins(%lhs, %rhs: tensor<64xf32>, tensor<64xf32>)
-          outs(%init:tensor<64xf32>)
+          inits(%init:tensor<64xf32>)
           (%lhs_elem: f32, %rhs_elem: f32) {
             %0 = arith.addf %lhs_elem, %rhs_elem: f32
             linalg.yield %0: f32
@@ -357,7 +357,7 @@
 }
 // CHECK-LABEL: func @map_binary
 //       CHECK:   linalg.map { arith.addf } ins
-//  CHECK-SAME:   outs
+//  CHECK-SAME:   inits
 
 // -----
 
@@ -365,7 +365,7 @@
                       %init: memref<64xf32>) {
    linalg.map
       ins(%lhs, %rhs: memref<64xf32>, memref<64xf32>)
-      outs(%init:memref<64xf32>)
+      inits(%init:memref<64xf32>)
       (%lhs_elem: f32, %rhs_elem: f32) {
         %0 = arith.addf %lhs_elem, %rhs_elem: f32
         linalg.yield %0: f32
@@ -380,7 +380,7 @@
 func.func @map_unary(%input: tensor<64xf32>, %init: tensor<64xf32>) -> tensor<64xf32> {
    %abs = linalg.map
           ins(%input:tensor<64xf32>)
-          outs(%init:tensor<64xf32>)
+          inits(%init:tensor<64xf32>)
           (%input_elem: f32) {
             %0 = math.absf %input_elem: f32
             linalg.yield %0: f32
@@ -395,7 +395,7 @@
 func.func @map_unary_memref(%input: memref<64xf32>, %init: memref<64xf32>) {
    linalg.map
       ins(%input:memref<64xf32>)
-      outs(%init:memref<64xf32>)
+      inits(%init:memref<64xf32>)
       (%input_elem: f32) {
         %0 = math.absf %input_elem: f32
         linalg.yield %0: f32
@@ -411,7 +411,7 @@
                   %init: tensor<16x64xf32>) -> tensor<16x64xf32> {
   %reduce = linalg.reduce
       ins(%input:tensor<16x32x64xf32>)
-      outs(%init:tensor<16x64xf32>)
+      inits(%init:tensor<16x64xf32>)
       dimensions = [1]
       (%in: f32, %out: f32) {
         %0 = arith.addf %out, %in: f32
@@ -421,7 +421,7 @@
 }
 // CHECK-LABEL: func @reduce
 //       CHECK:   linalg.reduce { arith.addf } ins
-//  CHECK-SAME:   outs
+//  CHECK-SAME:   inits
 //  CHECK-SAME:   dimensions = [1]
 
 // -----
@@ -430,7 +430,7 @@
                          %init: memref<16x64xf32>) {
   linalg.reduce
       ins(%input:memref<16x32x64xf32>)
-      outs(%init:memref<16x64xf32>)
+      inits(%init:memref<16x64xf32>)
       dimensions = [1]
       (%in: f32, %out: f32) {
         %0 = arith.addf %out, %in: f32
@@ -440,7 +440,7 @@
 }
 // CHECK-LABEL: func @reduce
 //       CHECK:   linalg.reduce { arith.addf } ins
-//  CHECK-SAME:   outs
+//  CHECK-SAME:   inits
 //  CHECK-SAME:   dimensions = [1]
 
 // -----
@@ -450,7 +450,7 @@
     %init2: tensor<16x64xi64>)  -> (tensor<16x64xf32>, tensor<16x64xi64>) {
   %reduce, %reduce2 = linalg.reduce
       ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xi64>)
-      outs(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xi64>)
+      inits(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xi64>)
       dimensions = [1]
       (%in1: f32, %in2: i64, %out1: f32, %out2: i64) {
         %0 = arith.addf %in1, %out1: f32
@@ -470,7 +470,7 @@
     %init2: memref<16x64xi64>) {
   linalg.reduce
       ins(%input1, %input2 : memref<16x32x64xf32>, memref<16x32x64xi64>)
-      outs(%init1, %init2 : memref<16x64xf32>, memref<16x64xi64>)
+      inits(%init1, %init2 : memref<16x64xf32>, memref<16x64xi64>)
       dimensions = [1]
       (%in1: f32, %in2: i64, %out1: f32, %out2: i64) {
         %0 = arith.addf %in1, %out1: f32
@@ -489,13 +489,13 @@
                      %init: tensor<32x64x16xf32>) -> tensor<32x64x16xf32> {
   %transpose = linalg.transpose
       ins(%input:tensor<16x32x64xf32>)
-      outs(%init:tensor<32x64x16xf32>)
+      inits(%init:tensor<32x64x16xf32>)
       permutation = [1, 2, 0]
   func.return %transpose : tensor<32x64x16xf32>
 }
 // CHECK-LABEL: func @transpose
 //      CHECK:    linalg.transpose ins
-// CHECK-SAME:    outs
+// CHECK-SAME:    inits
 // CHECK-SAME:    permutation
 
 // -----
@@ -504,7 +504,7 @@
                             %init: memref<32x64x16xf32>) {
   linalg.transpose
       ins(%input:memref<16x32x64xf32>)
-      outs(%init:memref<32x64x16xf32>)
+      inits(%init:memref<32x64x16xf32>)
       permutation = [1, 2, 0]
   func.return
 }
@@ -516,13 +516,13 @@
                             %init: tensor<8x16x32xf32>) -> tensor<8x16x32xf32> {
   %bcast = linalg.broadcast
       ins(%input:tensor<8x32xf32>)
-      outs(%init:tensor<8x16x32xf32>)
+      inits(%init:tensor<8x16x32xf32>)
       dimensions = [1]
   func.return %bcast : tensor<8x16x32xf32>
 }
 // CHECK-LABEL: func @broadcast_static_sizes
 //      CHECK:    linalg.broadcast ins
-// CHECK-SAME:    outs
+// CHECK-SAME:    inits
 // CHECK-SAME:    dimensions
 
 // -----
@@ -532,13 +532,13 @@
               -> tensor<8x16x?xf32> {
   %bcast = linalg.broadcast
       ins(%input:tensor<8x?xf32>)
-      outs(%init:tensor<8x16x?xf32>)
+      inits(%init:tensor<8x16x?xf32>)
       dimensions = [1]
   func.return %bcast : tensor<8x16x?xf32>
 }
 // CHECK-LABEL: func @broadcast_with_dynamic_sizes
 //      CHECK:    linalg.broadcast ins
-// CHECK-SAME:    outs
+// CHECK-SAME:    inits
 // CHECK-SAME:    dimensions
 
 // -----
@@ -547,14 +547,14 @@
                             %init: memref<8x16x32xf32>) {
   linalg.broadcast
       ins(%input:memref<8x32xf32>)
-      outs(%init:memref<8x16x32xf32>)
+      inits(%init:memref<8x16x32xf32>)
       dimensions = [1]
   func.return
 }
 
 // CHECK-LABEL: func @broadcast_memref
 //      CHECK:    linalg.broadcast ins
-// CHECK-SAME:    outs
+// CHECK-SAME:    inits
 // CHECK-SAME:    dimensions
 
 // -----
@@ -563,7 +563,7 @@
                       %init: tensor<64xf32>) -> tensor<64xf32> {
   %add = linalg.map
           ins(%lhs, %rhs: tensor<64xf32>, tensor<64xf32>)
-          outs(%init:tensor<64xf32>)
+          inits(%init:tensor<64xf32>)
           (%lhs_elem: f32, %rhs_elem: f32) {
             %0 = arith.addf %lhs_elem, %rhs_elem fastmath<fast> : f32
             linalg.yield %0: f32
@@ -575,7 +575,7 @@
 // CHECK-NEXT:    %[[MAPPED:.*]] = linalg.map
 // CHECK-SAME:    { arith.addf {fastmath = #arith.fastmath<fast>} }
 // CHECK-SAME:    ins
-// CHECK-SAME:    outs
+// CHECK-SAME:    inits
 // CHECK-NEXT:    return %[[MAPPED]] : tensor<64xf32>
 
 // -----
@@ -584,7 +584,7 @@
                   %init: tensor<16x64xf32>) -> tensor<16x64xf32> {
   %reduce = linalg.reduce
       ins(%input:tensor<16x32x64xf32>)
-      outs(%init:tensor<16x64xf32>)
+      inits(%init:tensor<16x64xf32>)
       dimensions = [1]
       (%in: f32, %out: f32) {
         %0 = arith.addf %out, %in fastmath<fast> : f32
@@ -596,6 +596,6 @@
 // CHECK-NEXT:    %[[REDUCED:.*]] = linalg.reduce
 // CHECK-SAME:    { arith.addf {fastmath = #arith.fastmath<fast>} }
 // CHECK-SAME:    ins
-// CHECK-SAME:    outs
+// CHECK-SAME:    inits
 // CHECK-SAME:    dimensions = [1]
 // CHECK-NEXT:    return %[[REDUCED]] : tensor<16x64xf32>
diff --git a/mlir/test/Dialect/Linalg/standard.mlir b/mlir/test/Dialect/Linalg/standard.mlir
--- a/mlir/test/Dialect/Linalg/standard.mlir
+++ b/mlir/test/Dialect/Linalg/standard.mlir
@@ -5,7 +5,7 @@
           %arg2: memref<f32>) {
   linalg.dot ins(%arg0, %arg1: memref<?xf32, strided<[1], offset: ?>>,
                                memref<?xf32, strided<[1], offset: ?>>)
-             outs(%arg2: memref<f32>)
+             inits(%arg2: memref<f32>)
   return
 }
 // CHECK-LABEL: func @dot(
@@ -46,7 +46,7 @@
 func.func @matmul_vec_impl(%A: !matrix_type_A, %B: !matrix_type_B, %C: !matrix_type_C) {
   linalg.generic #matmul_trait
       ins(%A, %B : !matrix_type_A, !matrix_type_B)
-     outs(%C : !matrix_type_C) {
+     inits(%C : !matrix_type_C) {
     ^bb0(%a: !vector_type_A, %b: !vector_type_B, %c: !vector_type_C):
       %d = vector.outerproduct %a, %b, %c: !vector_type_A, !vector_type_B
       linalg.yield %d: !vector_type_C
@@ -65,7 +65,7 @@
   // expected-error @below {{failed to legalize}}
   %0 = linalg.generic {
     indexing_maps = [#map, #map1], iterator_types = ["parallel", "reduction"]}
-  ins(%arg0 : tensor<?x?xf32>) outs(%arg1 : tensor<?xf32>) {
+  ins(%arg0 : tensor<?x?xf32>) inits(%arg1 : tensor<?xf32>) {
   ^bb0(%in: f32, %out: f32): 
     linalg.yield %in : f32
   } -> tensor<?xf32>
@@ -76,6 +76,6 @@
 
 func.func @func(%arg0: tensor<4x8xf32>, %arg1: tensor<4x8xf32>) -> tensor<4x8xf32> {
   // expected-error @below {{failed to legalize}}
-  %0 = linalg.copy ins(%arg0 : tensor<4x8xf32>) outs(%arg1 : tensor<4x8xf32>) -> tensor<4x8xf32>
+  %0 = linalg.copy ins(%arg0 : tensor<4x8xf32>) inits(%arg1 : tensor<4x8xf32>) -> tensor<4x8xf32>
   return %0 : tensor<4x8xf32>
 }
diff --git a/mlir/test/Dialect/Linalg/swap-extract-slice-with-fill.mlir b/mlir/test/Dialect/Linalg/swap-extract-slice-with-fill.mlir
--- a/mlir/test/Dialect/Linalg/swap-extract-slice-with-fill.mlir
+++ b/mlir/test/Dialect/Linalg/swap-extract-slice-with-fill.mlir
@@ -4,11 +4,11 @@
 //  CHECK-SAME: (%[[INIT:.+]]: tensor<?x?x?xf32>, %[[OFFSET0:.+]]: index, %[[SIZE1:.+]]: index)
 //       CHECK:   %[[F0:.+]] = arith.constant 0.000000e+00 : f32
 //       CHECK:   %[[EXT:.+]] = tensor.extract_slice %[[INIT]][%[[OFFSET0]], 8, 4] [1, %[[SIZE1]], 6] [1, 3, 1]
-//       CHECK:   %[[FILL:.+]] = linalg.fill ins(%[[F0]] : f32) outs(%[[EXT]] : tensor<?x6xf32>) -> tensor<?x6xf32>
+//       CHECK:   %[[FILL:.+]] = linalg.fill ins(%[[F0]] : f32) inits(%[[EXT]] : tensor<?x6xf32>) -> tensor<?x6xf32>
 //       CHECK:   return %[[FILL]]
 func.func @swap_fill_insert_slice(%init : tensor<?x?x?xf32>, %offset0: index, %size1: index) -> tensor<?x6xf32> {
   %f0 = arith.constant 0.000000e+00 : f32
-  %0 = linalg.fill ins(%f0 : f32) outs(%init : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+  %0 = linalg.fill ins(%f0 : f32) inits(%init : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
   %1 = tensor.extract_slice %0[%offset0, 8, 4] [1, %size1, 6] [1, 3, 1]
     : tensor<?x?x?xf32> to tensor<?x6xf32>
   return %1: tensor<?x6xf32>
@@ -21,7 +21,7 @@
 //       CHECK:   tensor.extract_slice
 func.func @dont_swap_fill_insert_slice_multi_user(%init : tensor<?x?x?xf32>, %offset0: index, %size1: index) -> (tensor<?x?x?xf32>, tensor<2x?x6xf32>) {
   %f0 = arith.constant 0.000000e+00 : f32
-  %0 = linalg.fill ins(%f0 : f32) outs(%init : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+  %0 = linalg.fill ins(%f0 : f32) inits(%init : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
   %1 = tensor.extract_slice %0[%offset0, 8, 4] [2, %size1, 6] [1, 3, 1]
     : tensor<?x?x?xf32> to tensor<2x?x6xf32>
   return %0, %1: tensor<?x?x?xf32>, tensor<2x?x6xf32>
diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
--- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
@@ -2,7 +2,7 @@
 
 func.func @matmul_tensors(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> {
   %t0 = linalg.matmul ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x?xf32>)
-                     outs(%arg2: tensor<?x?xf32>)
+                     inits(%arg2: tensor<?x?xf32>)
     -> tensor<?x?xf32>
 
   %c4 = arith.constant 4 : index
@@ -19,7 +19,7 @@
         %6 = tensor.extract_slice %t0[%arg3, %arg7][%c2, 4][1, 1] : tensor<?x?xf32> to tensor<?x4xf32>
         %7 = tensor.extract_slice %arg1[%arg7, %arg5][4, %c3][1, 1] : tensor<?x?xf32> to tensor<4x?xf32>
         %8 = tensor.extract_slice %arg8[%arg3, %arg5][%c2, %c3][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-        %9 = linalg.matmul ins(%6, %7 : tensor<?x4xf32>, tensor<4x?xf32>) outs(%8 : tensor<?x?xf32>) -> tensor<?x?xf32>
+        %9 = linalg.matmul ins(%6, %7 : tensor<?x4xf32>, tensor<4x?xf32>) inits(%8 : tensor<?x?xf32>) -> tensor<?x?xf32>
         %10 = tensor.insert_slice %9 into %arg8[%arg3, %arg5] [%c2, %c3] [1, 1]  : tensor<?x?xf32> into tensor<?x?xf32>
         scf.yield %10 : tensor<?x?xf32>
       }
@@ -50,8 +50,8 @@
 // slices of the producing matmul.
 //   CHECK-DAG:       %[[stB2:.*]] = tensor.extract_slice %[[B]][0, %[[K]]] [%[[dB0]], 4] [1, 1]  : tensor<?x?xf32> to tensor<?x4xf32>
 //   CHECK-DAG:       %[[stC:.*]] = tensor.extract_slice %[[C]][%[[I]], %[[K]]] [2, 4] [1, 1]  : tensor<?x?xf32> to tensor<2x4xf32>
-//       CHECK:       %[[stD:.*]] = linalg.matmul ins(%[[stA]], %[[stB2]] : tensor<2x?xf32>, tensor<?x4xf32>) outs(%[[stC]] : tensor<2x4xf32>)  -> tensor<2x4xf32>
-//  CHECK-NEXT:       %[[stG:.*]] = linalg.matmul ins(%[[stD]], %[[stB1]] : tensor<2x4xf32>, tensor<4x3xf32>) outs(%[[stF]] : tensor<2x3xf32>)  -> tensor<2x3xf32>
+//       CHECK:       %[[stD:.*]] = linalg.matmul ins(%[[stA]], %[[stB2]] : tensor<2x?xf32>, tensor<?x4xf32>) inits(%[[stC]] : tensor<2x4xf32>)  -> tensor<2x4xf32>
+//  CHECK-NEXT:       %[[stG:.*]] = linalg.matmul ins(%[[stD]], %[[stB1]] : tensor<2x4xf32>, tensor<4x3xf32>) inits(%[[stF]] : tensor<2x3xf32>)  -> tensor<2x3xf32>
 //  CHECK-NEXT:       tensor.insert_slice %[[stG]] into %[[RES]][%[[I]], %[[J]]]
 
 // -----
@@ -66,12 +66,12 @@
   %cst = arith.constant 0.0 : f32
 
   %init = tensor.empty() : tensor<1x112x112x32xf32>
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
 
   %conv = linalg.conv_2d_nhwc_hwcf
     {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
     ins(%input, %filter : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>)
-    outs(%fill : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
+    inits(%fill : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
 
   %for0 = scf.for %iv0 = %c0 to %c112 step %c8 iter_args(%arg0 = %fill) -> tensor<1x112x112x32xf32> {
     %for1 = scf.for %iv1 = %c0 to %c112 step %c16 iter_args(%arg1 = %arg0) -> tensor<1x112x112x32xf32> {
@@ -87,7 +87,7 @@
               affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>],
             iterator_types = ["parallel", "parallel", "parallel", "parallel"]
           }
-          ins(%0, %1 : tensor<1x8x16x4xf32>, tensor<1x8x16x4xf32>) outs(%2 : tensor<1x8x16x4xf32>) {
+          ins(%0, %1 : tensor<1x8x16x4xf32>, tensor<1x8x16x4xf32>) inits(%2 : tensor<1x8x16x4xf32>) {
         ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
           %result = arith.addf %arg3, %arg4 : f32
           linalg.yield %result : f32
@@ -110,7 +110,7 @@
 // CHECK-SAME: (%[[INPUT:.+]]: tensor<1x225x225x3xf32>, %[[FILTER:.+]]: tensor<3x3x3x32xf32>, %[[ELEM:.+]]: tensor<1x112x112x32xf32>)
 
 //      CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x112x112x32xf32>
-// CHECK-NEXT: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
+// CHECK-NEXT: %[[FILL:.+]] = linalg.fill ins(%cst : f32) inits(%[[INIT]] : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
 
 // CHECK-NEXT: scf.for %[[IV0:.+]] = %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[ARG0:.+]] = %[[FILL]])
 // CHECK-NEXT:   %[[OFFSET_H:.+]] = affine.apply #[[MAP0]](%[[IV0]])
@@ -124,10 +124,10 @@
 // CHECK-NEXT:       %[[ST_FILL:.+]] = tensor.extract_slice %[[FILL]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32>
 // CHECK-NEXT:       %[[ST_CONV:.+]] = linalg.conv_2d_nhwc_hwcf
 // CHECK-SAME:         ins(%[[ST_INPUT]], %[[ST_FILTER]] : tensor<1x17x33x3xf32>, tensor<3x3x3x4xf32>)
-// CHECK-SAME:         outs(%[[ST_FILL]] : tensor<1x8x16x4xf32>)
+// CHECK-SAME:         inits(%[[ST_FILL]] : tensor<1x8x16x4xf32>)
 // CHECK-NEXT:       %[[ADD:.+]] = linalg.generic
 // CHECK-SAME:         ins(%[[ST_CONV]], %[[ST_ELEM]] : tensor<1x8x16x4xf32>, tensor<1x8x16x4xf32>)
-// CHECK-SAME:         outs(%[[ST_ARG2]] : tensor<1x8x16x4xf32>)
+// CHECK-SAME:         inits(%[[ST_ARG2]] : tensor<1x8x16x4xf32>)
 //      CHECK:       tensor.insert_slice %[[ADD]] into %[[ARG2]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4]
 
 // -----
@@ -148,12 +148,12 @@
   %oc = tensor.dim %elementwise, %c3 : tensor<?x?x?x?xf32>
 
   %init = tensor.empty(%n, %oh, %ow, %oc) : tensor<?x?x?x?xf32>
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
 
   %conv = linalg.conv_2d_nhwc_hwcf
     {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
     ins(%input, %filter : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
-    outs(%fill : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
+    inits(%fill : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
 
   %for0 = scf.for %iv0 = %c0 to %n step %c8 iter_args(%arg0 = %fill) -> tensor<?x?x?x?xf32> {
     %for1 = scf.for %iv1 = %c0 to %oh step %c16 iter_args(%arg1 = %arg0) -> tensor<?x?x?x?xf32> {
@@ -174,7 +174,7 @@
                 affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>],
               iterator_types = ["parallel", "parallel", "parallel", "parallel"]
             }
-            ins(%0, %1 : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>) outs(%2 : tensor<?x?x?x?xf32>) {
+            ins(%0, %1 : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>) inits(%2 : tensor<?x?x?x?xf32>) {
           ^bb0(%arg4: f32, %arg5: f32, %arg6: f32):
             %result = arith.addf %arg4, %arg5 : f32
             linalg.yield %result : f32
@@ -217,7 +217,7 @@
 //  CHECK-DAG:   %[[ELEM_OC:.+]] = tensor.dim %[[ELEM]], %[[C3]] : tensor<?x?x?x?xf32>
 
 //      CHECK:   %[[INIT:.+]] = tensor.empty(%[[ELEM_N]], %[[ELEM_OH]], %[[ELEM_OW]], %[[ELEM_OC]]) : tensor<?x?x?x?xf32>
-//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
+//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%cst : f32) inits(%[[INIT]] : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
 
 //  CHECK-DAG:   %[[FILTER_H:.+]] = tensor.dim %[[FILTER]], %[[C0]] : tensor<?x?x?x?xf32>
 //  CHECK-DAG:   %[[FILTER_W:.+]] = tensor.dim %[[FILTER]], %[[C1]] : tensor<?x?x?x?xf32>
@@ -256,10 +256,10 @@
 // CHECK-SAME:                 [%[[SIZE_INPUT_N]], %[[SIZE_ELEM_OH_2]], %[[SIZE_ELEM_OW_2]], %[[SIZE_ELEM_OC_2]]]
 // CHECK-NEXT:           %[[ST_CONV:.+]] = linalg.conv_2d_nhwc_hwcf
 // CHECK-SAME:                 ins(%[[ST_INPUT]], %[[ST_FILTER]] : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
-// CHECK-SAME:                 outs(%[[ST_FILL]] : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
+// CHECK-SAME:                 inits(%[[ST_FILL]] : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
 // CHECK-NEXT:           %[[ST_ADD:.+]] = linalg.generic
 // CHECK-SAME:                 ins(%[[ST_CONV]], %[[ST_ELEM]] : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
-// CHECK-SAME:                 outs(%[[ST_ARG]] : tensor<?x?x?x?xf32>)
+// CHECK-SAME:                 inits(%[[ST_ARG]] : tensor<?x?x?x?xf32>)
 //      CHECK:           tensor.insert_slice %[[ST_ADD]] into %[[ARG]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
 // CHECK-SAME:                 [%[[SIZE_ELEM_N]], %[[SIZE_ELEM_OH]], %[[SIZE_ELEM_OW]], %[[SIZE_ELEM_OC]]]
 
@@ -301,7 +301,7 @@
     tensor.yield %zero : f32
   } : tensor<58x1xf32> to tensor<64x128xf32>
 
-  %fill = linalg.fill ins(%zero : f32) outs(%large_input : tensor<64x128xf32>) -> tensor<64x128xf32>
+  %fill = linalg.fill ins(%zero : f32) inits(%large_input : tensor<64x128xf32>) -> tensor<64x128xf32>
 
   %for0 = scf.for %iv0 = %c0 to %d0 step %c16 iter_args(%arg0 = %fill) -> tensor<64x128xf32> {
     %for1 = scf.for %iv1 = %c0 to %d1 step %c32 iter_args(%arg1 = %arg0) -> tensor<64x128xf32> {
@@ -311,7 +311,7 @@
 
       %add = linalg.generic
         {indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]}
-        ins(%0, %1 : tensor<16x32xf32>, tensor<16x32xf32>) outs(%2 : tensor<16x32xf32>) {
+        ins(%0, %1 : tensor<16x32xf32>, tensor<16x32xf32>) inits(%2 : tensor<16x32xf32>) {
       ^bb0(%arg4: f32, %arg5: f32, %arg6: f32):
         %result = arith.addf %arg4, %arg5 : f32
         linalg.yield %result : f32
diff --git a/mlir/test/Dialect/Linalg/tile-conv.mlir b/mlir/test/Dialect/Linalg/tile-conv.mlir
--- a/mlir/test/Dialect/Linalg/tile-conv.mlir
+++ b/mlir/test/Dialect/Linalg/tile-conv.mlir
@@ -5,7 +5,7 @@
 //  CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0] -> (d0 + s0 - 1)>
 
 func.func @conv(%arg0 : memref<?x?xf32>, %arg1 : memref<?x?xf32>, %arg2 : memref<?x?xf32>) {
-  linalg.conv_2d ins(%arg0, %arg1 : memref<?x?xf32>, memref<?x?xf32>) outs(%arg2 : memref<?x?xf32>)
+  linalg.conv_2d ins(%arg0, %arg1 : memref<?x?xf32>, memref<?x?xf32>) inits(%arg2 : memref<?x?xf32>)
   return
 }
 
@@ -38,4 +38,4 @@
 //   CHECK-DAG:       %[[SVOUT:.*]] = memref.subview %[[ARG2]][%[[I]], %[[J]]] [%[[T4]], %[[T5]]]
 //       CHECK:       linalg.conv_2d
 //  CHECK-SAME:         ins(%[[SVIN]], %[[SVKER]]
-//  CHECK-SAME:         outs(%[[SVOUT]]
+//  CHECK-SAME:         inits(%[[SVOUT]]
diff --git a/mlir/test/Dialect/Linalg/tile-indexed.mlir b/mlir/test/Dialect/Linalg/tile-indexed.mlir
--- a/mlir/test/Dialect/Linalg/tile-indexed.mlir
+++ b/mlir/test/Dialect/Linalg/tile-indexed.mlir
@@ -3,7 +3,7 @@
 func.func @indexed_vector(%arg0: memref<50xindex>) {
   linalg.generic {indexing_maps = [affine_map<(i) -> (i)>],
                   iterator_types = ["parallel"]}
-     outs(%arg0 : memref<50xindex>) {
+     inits(%arg0 : memref<50xindex>) {
     ^bb0(%a: index):
       %i = linalg.index 0 : index
       linalg.yield %i : index
@@ -31,7 +31,7 @@
 func.func @indexed_matrix(%arg0: memref<50x50xindex>) {
   linalg.generic {indexing_maps = [affine_map<(i, j) -> (i, j)>],
                   iterator_types = ["parallel", "parallel"]}
-    outs(%arg0 : memref<50x50xindex>) {
+    inits(%arg0 : memref<50x50xindex>) {
     ^bb0(%a: index):
       %i = linalg.index 0 : index
       %j = linalg.index 1 : index
diff --git a/mlir/test/Dialect/Linalg/tile-tensors.mlir b/mlir/test/Dialect/Linalg/tile-tensors.mlir
--- a/mlir/test/Dialect/Linalg/tile-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-tensors.mlir
@@ -14,13 +14,13 @@
 //      CHECK:       %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
 //      CHECK:       %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
 //      CHECK:       %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor<?x?xf32>, tensor<?x?xf32>)
-// CHECK-SAME:                                  outs(%[[sTC]] : tensor<?x?xf32>)  -> tensor<?x?xf32>
+// CHECK-SAME:                                  inits(%[[sTC]] : tensor<?x?xf32>)  -> tensor<?x?xf32>
 //      CHECK:       %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}]  : tensor<?x?xf32> into tensor<?x?xf32>
 //      CHECK:       scf.yield %[[TD]] : tensor<?x?xf32>
 //      CHECK:     scf.yield %[[TD2]] : tensor<?x?xf32>
 //      CHECK:   scf.yield %[[TD1]] : tensor<?x?xf32>
   %0 = linalg.matmul  ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x?xf32>)
-                     outs(%arg2: tensor<?x?xf32>)
+                     inits(%arg2: tensor<?x?xf32>)
     -> tensor<?x?xf32>
 
 //      CHECK: return %[[TD0]] : tensor<?x?xf32>
@@ -50,7 +50,7 @@
                       affine_map<(d0, d1, d2) -> (d2, d1, d0)>],
      iterator_types = ["parallel", "parallel", "parallel"]}
     ins(%arg0, %arg1 : tensor<?x?x?xf32>, tensor<?x?x?xf32>)
-    outs(%3 : tensor<?x?x?xf32>) {
+    inits(%3 : tensor<?x?x?xf32>) {
     ^bb0(%arg2 : f32, %arg3: f32, %arg4: f32):
       %5 = arith.addf %arg2, %arg3 : f32
       linalg.yield %5 : f32
@@ -76,7 +76,7 @@
 //       CHECK:       %[[STARG2:.+]] = tensor.extract_slice %[[TC2]][{{.+}}] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
 //       CHECK:       %[[STRETURN:.+]] = linalg.generic
 //  CHECK-SAME:         ins(%[[STARG0]], %[[STARG1]] : tensor<?x?x?xf32>, tensor<?x?x?xf32>)
-//  CHECK-SAME:         outs(%[[STARG2]] : tensor<?x?x?xf32>)
+//  CHECK-SAME:         inits(%[[STARG2]] : tensor<?x?x?xf32>)
 //       CHECK:       %[[TD:.+]] = tensor.insert_slice %[[STRETURN]] into %[[TC2]]
 //       CHECK:       scf.yield %[[TD]]
 //       CHECK:     }
@@ -121,7 +121,7 @@
                       affine_map<(d0, d1, d2) -> (d0, d1)>],
      iterator_types = ["parallel", "parallel", "parallel"]}
     ins(%1, %arg2 : tensor<?x42xf32>, tensor<?x42x?xf32>)
-    outs(%arg1 : tensor<?x42xf32>) {
+    inits(%arg1 : tensor<?x42xf32>) {
     ^bb0(%arg3 : f32, %arg4: f32, %arg5: f32):
       %5 = arith.addf %arg3, %arg5 : f32
       linalg.yield %5 : f32
diff --git a/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir b/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir
--- a/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir
+++ b/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir
@@ -19,14 +19,14 @@
   //      CHECK:   %[[tC:.*]] = tensor.extract_slice %[[C_BLK]]{{.*}} : tensor<?x?xf32> to tensor<?x?xf32>
   //      CHECK:   %[[RES:.*]] = linalg.matmul
   // CHECK-SAME:      ins(%[[tA]], %[[tB]] : tensor<?x?xf32>, tensor<?x?xf32>)
-  // CHECK-SAME:     outs(%[[tC]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+  // CHECK-SAME:     inits(%[[tC]] : tensor<?x?xf32>) -> tensor<?x?xf32>
   //      CHECK:   scf.forall.in_parallel {
   // CHECK-NEXT:     tensor.parallel_insert_slice %[[RES]] into %[[C_BLK]]{{.*}} :
   // CHECK-SAME:       tensor<?x?xf32> into tensor<?x?xf32>
   // CHECK-NEXT:   }
   // CHECK-NEXT: } {mapping = [#gpu.thread<y>, #gpu.thread<x>]}
     %0 = linalg.matmul ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>)
-                      outs(%C : tensor<?x?xf32>) -> (tensor<?x?xf32>)
+                      inits(%C : tensor<?x?xf32>) -> (tensor<?x?xf32>)
     return %0 : tensor<?x?xf32>
   }
 
@@ -68,7 +68,7 @@
   %tile_size_1 = "test.dummy"() : () -> (index)
   %tile_size_2 = "test.dummy"() : () -> (index)
   %0 = linalg.matmul ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>)
-                    outs(%C : tensor<?x?xf32>) -> (tensor<?x?xf32>)
+                    inits(%C : tensor<?x?xf32>) -> (tensor<?x?xf32>)
   return %0 : tensor<?x?xf32>
 }
 
@@ -107,7 +107,7 @@
   //      CHECK:   scf.forall.in_parallel
   // CHECK-NEXT:    tensor.parallel_insert_slice
   %0 = linalg.matmul ins(%A, %B : tensor<100x200xf32>, tensor<200x300xf32>)
-                    outs(%C : tensor<100x300xf32>) -> (tensor<100x300xf32>)
+                    inits(%C : tensor<100x300xf32>) -> (tensor<100x300xf32>)
   return %0 : tensor<100x300xf32>
 }
 
@@ -148,7 +148,7 @@
   //      CHECK:   scf.forall.in_parallel
   // CHECK-NEXT:    tensor.parallel_insert_slice
   %0 = linalg.matmul ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>)
-                    outs(%C : tensor<?x?xf32>) -> (tensor<?x?xf32>)
+                    inits(%C : tensor<?x?xf32>) -> (tensor<?x?xf32>)
   return %0 : tensor<?x?xf32>
 }
 
@@ -184,7 +184,7 @@
   //      CHECK:   scf.forall.in_parallel
   // CHECK-NEXT:    tensor.parallel_insert_slice
   %0 = linalg.matmul ins(%A, %B : tensor<100x200xf32>, tensor<200x300xf32>)
-                    outs(%C : tensor<100x300xf32>) -> (tensor<100x300xf32>)
+                    inits(%C : tensor<100x300xf32>) -> (tensor<100x300xf32>)
   return %0 : tensor<100x300xf32>
 }
 
@@ -202,7 +202,7 @@
     %result = linalg.generic {indexing_maps = [
       affine_map<(d0) -> (d0)>,affine_map<(d0) -> (d0)>],
       iterator_types = ["parallel"]}
-      ins(%A : tensor<4xf32>) outs(%B1 : tensor<4xf32>) {
+      ins(%A : tensor<4xf32>) inits(%B1 : tensor<4xf32>) {
       ^bb0(%arg3: f32, %arg4: f32):  // no predecessors
         %2 = arith.addf %arg3, %arg3 : f32
         linalg.yield %2 : f32
@@ -256,7 +256,7 @@
   // CHECK-NEXT:    tensor.parallel_insert_slice
   %tile_size = "test.dummy"() : () -> (index)
   %0 = linalg.matmul ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>)
-                    outs(%C : tensor<?x?xf32>) -> (tensor<?x?xf32>)
+                    inits(%C : tensor<?x?xf32>) -> (tensor<?x?xf32>)
   return %0 : tensor<?x?xf32>
 }
 
@@ -304,7 +304,7 @@
                        affine_map<(d0) -> (d0)>],
       iterator_types = ["parallel"]
     } ins(%IN1, %IN2 : tensor<100xf32>, tensor<100xf32>)
-      outs(%OUT1, %OUT2 : tensor<100xf32>, tensor<100xf32>)
+      inits(%OUT1, %OUT2 : tensor<100xf32>, tensor<100xf32>)
     {
       ^bb0(%a1: f32, %a2: f32, %a3: f32, %a4: f32):
         %1 = arith.addf %a1, %a3 : f32
@@ -356,7 +356,7 @@
                        ],
       iterator_types = ["parallel", "parallel"]
     } ins(%IN1, %IN2, %IN3 : tensor<100xf32>, tensor<100x300xf32>, tensor<300xf32>)
-      outs(%OUT1, %OUT2: tensor<300x100xf32>, tensor<300xf32>)  {
+      inits(%OUT1, %OUT2: tensor<300x100xf32>, tensor<300xf32>)  {
       ^bb0(%i1: f32, %i2: f32, %i3: f32, %o1: f32, %o2: f32):
         %1 = arith.addf %i1, %o1 : f32
         %2 = arith.addf %i2, %1 : f32
diff --git a/mlir/test/Dialect/Linalg/transform-lower-pack.mlir b/mlir/test/Dialect/Linalg/transform-lower-pack.mlir
--- a/mlir/test/Dialect/Linalg/transform-lower-pack.mlir
+++ b/mlir/test/Dialect/Linalg/transform-lower-pack.mlir
@@ -12,7 +12,7 @@
   // CHECK-SAME:   : tensor<136x64x16x16xf32> into tensor<17x8x2x32x16x16xf32>
   //      CHECK: linalg.transpose
   // CHECK-SAME:   ins(%{{.*}} : tensor<17x8x2x32x16x16xf32>)
-  // CHECK-SAME:   outs(%{{.*}} : tensor<17x2x16x16x32x8xf32>)
+  // CHECK-SAME:   inits(%{{.*}} : tensor<17x2x16x16x32x8xf32>)
   // CHECK-SAME:   permutation = [0, 2, 4, 5, 3, 1]
   %pack = tensor.pack %arg0 padding_value(%cst_0 : f32) inner_dims_pos = [1, 0] inner_tiles = [32, 8] into %arg1
     : tensor<129x47x16x16xf32> -> tensor<17x2x16x16x32x8xf32>
@@ -36,7 +36,7 @@
   //      CHECK: tensor.empty() : tensor<17x8x2x32x16x16xf32>
   //      CHECK: linalg.transpose
   // CHECK-SAME:    ins(%{{.*}} : tensor<17x2x16x16x32x8xf32>)
-  // CHECK-SAME:   outs(%{{.*}} : tensor<17x8x2x32x16x16xf32>)
+  // CHECK-SAME:   inits(%{{.*}} : tensor<17x8x2x32x16x16xf32>)
   // CHECK-SAME:   permutation = [0, 5, 1, 4, 2, 3]
   //      CHECK: tensor.collapse_shape {{.*}}[0, 1], [2, 3], [4], [5]] 
   // CHECK-SAME:   : tensor<17x8x2x32x16x16xf32> into tensor<136x64x16x16xf32>
diff --git a/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir b/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir
@@ -12,7 +12,7 @@
 //   CHECK-DAG:   %[[size0:.*]] = affine.apply #[[$map]]()[%[[h1]], %[[dim0]]]
 //   CHECK-DAG:   %[[size1:.*]] = affine.apply #[[$map1]]()[%[[l2]], %[[h2]]]
 //       CHECK:   %[[alloc:.*]] = memref.alloc(%[[size0]], %[[size1]]) : memref<?x?xindex>
-//       CHECK:   linalg.fill ins(%[[c50]] : index) outs(%[[alloc]] : memref<?x?xindex>)
+//       CHECK:   linalg.fill ins(%[[c50]] : index) inits(%[[alloc]] : memref<?x?xindex>)
 //       CHECK:   %[[dim0:.*]] = tensor.dim %[[t]], %[[c0]]
 //       CHECK:   %[[subview:.*]] = memref.subview %[[alloc]][5, %[[l2]]] [%[[dim0]], 10] [1, 1]
 //       CHECK:   memref.tensor_store %[[t]], %[[subview]]
diff --git a/mlir/test/Dialect/Linalg/transform-op-decompose.mlir b/mlir/test/Dialect/Linalg/transform-op-decompose.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-decompose.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-decompose.mlir
@@ -47,11 +47,11 @@
   // CHECK: %[[SLICERES:.+]] = tensor.extract_slice %[[RES]]
   // CHECK: %[[OPRES:.+]] = linalg.depthwise_conv_1d_nwc_wc
   // CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]]
-  // CHECK-SAME: outs(%[[SLICERES]]
+  // CHECK-SAME: inits(%[[SLICERES]]
   // CHECK: %[[INSERTED:.+]] = tensor.insert_slice %[[OPRES]] into %[[RES]]
   %0 = linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>}
          ins(%input, %filter: tensor<1x1x113x96xf32>, tensor<1x3x96xf32>)
-         outs(%init: tensor<1x1x56x96xf32>) -> tensor<1x1x56x96xf32>
+         inits(%init: tensor<1x1x56x96xf32>) -> tensor<1x1x56x96xf32>
   // CHECK: %[[INSERTED]]
   return %0: tensor<1x1x56x96xf32>
 }
diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir
@@ -12,7 +12,7 @@
   func.func @fuse_tileable_op(%arg0: index, %arg1: tensor<?xf32>, %arg2: tensor<?xf32>) -> tensor<?xf32> {
     %cst = arith.constant 4.200000e+01 : f32
     %c0 = arith.constant 0 : index
-    %0 = linalg.fill ins(%cst : f32) outs(%arg1 : tensor<?xf32>) -> tensor<?xf32>
+    %0 = linalg.fill ins(%cst : f32) inits(%arg1 : tensor<?xf32>) -> tensor<?xf32>
     %d0 = tensor.dim %arg1, %c0 : tensor<?xf32>
     %1 = affine.apply #map0()[%d0, %arg0]
 
@@ -23,11 +23,11 @@
       %5 = tensor.extract_slice %o[%3] [%4] [1] : tensor<?xf32> to tensor<?xf32>
 
       // CHECK: %[[T0:.*]] = tensor.extract_slice %[[IN]][%{{.*}}] [%{{.*}}] [{{.*}}]
-      // CHECK: %[[T1:.*]] = linalg.fill {{.*}} outs(%[[T0]]
+      // CHECK: %[[T1:.*]] = linalg.fill {{.*}} inits(%[[T0]]
       %6 = tensor.extract_slice %0[%3] [%4] [1] : tensor<?xf32> to tensor<?xf32>
 
       // CHECK: %[[T2:.*]] = linalg.elemwise_unary ins(%[[T1]]
-      %7 = linalg.elemwise_unary ins(%6 : tensor<?xf32>) outs(%5 : tensor<?xf32>) -> tensor<?xf32>
+      %7 = linalg.elemwise_unary ins(%6 : tensor<?xf32>) inits(%5 : tensor<?xf32>) -> tensor<?xf32>
       scf.forall.in_parallel {
         tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor<?xf32> into tensor<?xf32>
       }
@@ -74,7 +74,7 @@
       %5 = tensor.extract_slice %o[%3] [%4] [1] : tensor<64xf32> to tensor<?xf32>
 
       // CHECK: %[[T2:.*]] = linalg.elemwise_unary ins(%[[INIT_TENSOR]]
-      %7 = linalg.elemwise_unary ins(%0 : tensor<?xf32>) outs(%5 : tensor<?xf32>) -> tensor<?xf32>
+      %7 = linalg.elemwise_unary ins(%0 : tensor<?xf32>) inits(%5 : tensor<?xf32>) -> tensor<?xf32>
       scf.forall.in_parallel {
         tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor<?xf32> into tensor<64xf32>
       }
@@ -108,7 +108,7 @@
   func.func @fuse_tileable_op_rank_reducing(%arg0: index, %arg1: tensor<?xf32>, %arg2: tensor<?xf32>) -> tensor<?xf32> {
     %cst = arith.constant 4.200000e+01 : f32
     %c0 = arith.constant 0 : index
-    %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<?xf32>) -> tensor<?xf32>
+    %0 = linalg.fill ins(%cst : f32) inits(%arg2 : tensor<?xf32>) -> tensor<?xf32>
     %d0 = tensor.dim %arg1, %c0 : tensor<?xf32>
 
     // CHECK: scf.forall {{.*}} -> (tensor<?xf32>) {
@@ -116,7 +116,7 @@
       %5 = tensor.extract_slice %o[%arg3] [1] [1] : tensor<?xf32> to tensor<f32>
       
       // CHECK: tensor.extract_slice %{{.*}}[%{{.*}}] [1] [1] : tensor<?xf32> to tensor<1xf32>
-      // CHECK: linalg.fill ins(%{{.*}} : f32) outs(%{{.*}} : tensor<1xf32>) -> tensor<1xf32>
+      // CHECK: linalg.fill ins(%{{.*}} : f32) inits(%{{.*}} : tensor<1xf32>) -> tensor<1xf32>
       // CHECK: tensor.extract_slice %{{.*}}[0] [1] [1] : tensor<1xf32> to tensor<f32>
       // CHECK: func.call @foo(%{{.*}}) : (tensor<f32>) -> tensor<f32>
       %7 = func.call @foo(%5) : (tensor<f32>) -> tensor<f32>
@@ -154,7 +154,7 @@
   func.func @fuse_tileable_op_through_bbarg(%arg0: index, %arg1: tensor<?xf32>, %arg2: tensor<?xf32>) -> tensor<?xf32> {
     %cst = arith.constant 4.200000e+01 : f32
     %c0 = arith.constant 0 : index
-    %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<?xf32>) -> tensor<?xf32>
+    %0 = linalg.fill ins(%cst : f32) inits(%arg2 : tensor<?xf32>) -> tensor<?xf32>
     %d0 = tensor.dim %arg1, %c0 : tensor<?xf32>
     %1 = affine.apply #map0()[%d0, %arg0]
 
@@ -165,11 +165,11 @@
       %5 = tensor.extract_slice %o[%3] [%4] [1] : tensor<?xf32> to tensor<?xf32>
 
       // CHECK: %[[T0:.*]] = tensor.extract_slice %[[BBARGOUT]][%{{.*}}] [%{{.*}}] [{{.*}}]
-      // CHECK: %[[T1:.*]] = linalg.fill {{.*}} outs(%[[T0]]
+      // CHECK: %[[T1:.*]] = linalg.fill {{.*}} inits(%[[T0]]
       %6 = tensor.extract_slice %arg1[%3] [%4] [1] : tensor<?xf32> to tensor<?xf32>
 
-      // CHECK: %[[T2:.*]] = linalg.elemwise_unary {{.*}} outs(%[[T1]]
-      %7 = linalg.elemwise_unary ins(%6 : tensor<?xf32>) outs(%5 : tensor<?xf32>) -> tensor<?xf32>
+      // CHECK: %[[T2:.*]] = linalg.elemwise_unary {{.*}} inits(%[[T1]]
+      %7 = linalg.elemwise_unary ins(%6 : tensor<?xf32>) inits(%5 : tensor<?xf32>) -> tensor<?xf32>
       scf.forall.in_parallel {
         tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor<?xf32> into tensor<?xf32>
       }
@@ -208,7 +208,7 @@
     %0:2 = linalg.generic {
       indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
       iterator_types = ["parallel"]
-    } ins(%in : tensor<?xf32>) outs(%out_1, %out_3 : tensor<?xf32>, tensor<?xf32>) {
+    } ins(%in : tensor<?xf32>) inits(%out_1, %out_3 : tensor<?xf32>, tensor<?xf32>) {
       ^bb0(%a: f32, %b: f32, %c: f32):
         %d = arith.addf %a, %b : f32
         %e = arith.addf %d, %c : f32
@@ -229,7 +229,7 @@
       %6 = tensor.extract_slice %0#0[%3] [%4] [1] : tensor<?xf32> to tensor<?xf32>
 
       // CHECK: %[[T2:.*]] = linalg.elemwise_unary ins(%[[T1]]#0
-      %7 = linalg.elemwise_unary ins(%6 : tensor<?xf32>) outs(%5 : tensor<?xf32>) -> tensor<?xf32>
+      %7 = linalg.elemwise_unary ins(%6 : tensor<?xf32>) inits(%5 : tensor<?xf32>) -> tensor<?xf32>
       scf.forall.in_parallel {
         tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor<?xf32> into tensor<?xf32>
       }
diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
@@ -9,9 +9,9 @@
   //     CHECK:       linalg.elemwise_binary
   //     CHECK: return %[[RES]]
   %0 = linalg.elemwise_unary ins(%arg0 : tensor<?x?xf32>)
-                             outs(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
+                             inits(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
   %1 = linalg.elemwise_binary ins(%0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>)
-                             outs(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
+                             inits(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
   return %1 : tensor<?x?xf32>
 }
 
@@ -36,9 +36,9 @@
   //     CHECK:       linalg.elemwise_binary
   //     CHECK: return %[[RES]]
   %0 = linalg.elemwise_unary ins(%arg0 : tensor<?x?xf32>)
-                             outs(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
+                             inits(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
   %1 = linalg.elemwise_binary ins(%0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>)
-                             outs(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
+                             inits(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
   return %1 : tensor<?x?xf32>
 }
 
@@ -66,18 +66,18 @@
 //       CHECK:   scf.for %[[IV1:.+]] = %{{.+}} to %{{.+}} step %[[C7]] iter_args(%[[FOR_ARG1:.+]] = %[[FOR_ARG0]])
 //       CHECK:     %[[OUT_SLICE0:.+]] = tensor.extract_slice %[[INPUT]][%[[IV0]], 0, %[[IV1]]]
 //       CHECK:     %[[OUT_SLICE1:.+]] = tensor.extract_slice %[[FOR_ARG1]][%[[IV0]], %[[IV1]]]
-//       CHECK:     %[[FILL:.+]] = linalg.fill {{.+}} outs(%[[OUT_SLICE1]] : tensor<?x?xf32>)
+//       CHECK:     %[[FILL:.+]] = linalg.fill {{.+}} inits(%[[OUT_SLICE1]] : tensor<?x?xf32>)
 //       CHECK:     scf.for %[[IV2:.+]] = %{{.+}} to %{{.+}} step %[[C4]] iter_args(%[[FOR_ARG2:.+]] = %[[FILL]])
 //       CHECK:       %[[IN_SLICE:.+]] = tensor.extract_slice %[[OUT_SLICE0]]
 //       CHECK:       %[[OUT_SLICE2:.+]] = tensor.extract_slice %[[FOR_ARG2]][0, 0]
-//       CHECK:       linalg.generic {{.+}} ins(%[[IN_SLICE]] : tensor<?x?x?xf32>) outs(%[[OUT_SLICE2]] : tensor<?x?xf32>)
+//       CHECK:       linalg.generic {{.+}} ins(%[[IN_SLICE]] : tensor<?x?x?xf32>) inits(%[[OUT_SLICE2]] : tensor<?x?xf32>)
 //       CHECK: return %[[RES]]
 
-  %fill = linalg.fill ins(%five : f32) outs(%init : tensor<12x25xf32>) -> tensor<12x25xf32>
+  %fill = linalg.fill ins(%five : f32) inits(%init : tensor<12x25xf32>) -> tensor<12x25xf32>
   %0 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d2)>],
     iterator_types = ["parallel", "reduction", "parallel"]
-  } ins(%input : tensor<12x7x25xf32>) outs(%fill : tensor<12x25xf32>) {
+  } ins(%input : tensor<12x7x25xf32>) inits(%fill : tensor<12x25xf32>) {
   ^bb0(%arg0: f32, %arg1: f32):
     %2 = arith.addf %arg0, %arg1 : f32
     linalg.yield %2 : f32
@@ -105,7 +105,7 @@
   %1 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %0
       : tensor<16x48x8x8xf32> -> tensor<128x384xf32>
   %2 = linalg.elemwise_unary ins(%1: tensor<128x384xf32>)
-                             outs(%arg1: tensor<128x384xf32>) -> tensor<128x384xf32>
+                             inits(%arg1: tensor<128x384xf32>) -> tensor<128x384xf32>
   return %2 : tensor<128x384xf32>
 }
 
diff --git a/mlir/test/Dialect/Linalg/transform-op-generalize.mlir b/mlir/test/Dialect/Linalg/transform-op-generalize.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-generalize.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-generalize.mlir
@@ -6,7 +6,7 @@
   // CHECK-NOT:   linalg.elemwise_unary
   //     CHECK:   linalg.generic
   %0 = linalg.elemwise_unary ins(%arg0 : tensor<?x?xf32>)
-                             outs(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
+                             inits(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
   return %0 : tensor<?x?xf32>
 }
 
diff --git a/mlir/test/Dialect/Linalg/transform-op-hoist-pad.mlir b/mlir/test/Dialect/Linalg/transform-op-hoist-pad.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-hoist-pad.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-hoist-pad.mlir
@@ -5,7 +5,7 @@
      -> tensor<24x25xf32> 
 {
   // expected-note @below {{payload operation}}
-  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
+  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
   func.return %0 : tensor<24x25xf32>
 }
 
@@ -40,7 +40,7 @@
      -> tensor<24x25xf32> 
 {
   // expected-note @below {{when applied to this op}}
-  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
+  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
   func.return %0 : tensor<24x25xf32>
 }
 
@@ -82,7 +82,7 @@
   //     CHECK:   %[[PADDED:.*]] = tensor.extract_slice %[[PACKED]][%{{.*}}, 0, 0] [1, 5, 12] [1, 1, 1] 
   // CHECK-SAME:    : tensor<5x5x12xf32> to tensor<5x12xf32>
   //     CHECK:   linalg.matmul ins(%[[PADDED]]
-  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
+  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
   func.return %0 : tensor<24x25xf32>
 }
 
@@ -126,7 +126,7 @@
   //     CHECK:   %[[TRANSPOSED:.*]] = linalg.generic
   //     CHECK:     -> tensor<5x12xf32>
   //     CHECK:   linalg.matmul ins(%[[TRANSPOSED]]
-  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
+  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
   func.return %0 : tensor<24x25xf32>
 }
 
@@ -162,13 +162,13 @@
   //      CHECK:   %[[PADDED:.*]] = tensor.pad %{{.*}} 
   //      CHECK:     : tensor<?x25xf32> to tensor<5x25xf32>
   //      CHECK:   scf.for %{{.*}} iter_args(%[[INNER_PADDED:[0-9a-zA-Z]*]] = %[[PADDED]]) -> (tensor<5x25xf32>)
-  //      CHECK:     %[[RES:.*]] = linalg.matmul {{.*}} outs(%[[INNER_PADDED]]
+  //      CHECK:     %[[RES:.*]] = linalg.matmul {{.*}} inits(%[[INNER_PADDED]]
   // CHECK-SAME:       : tensor<5x25xf32>
   //      CHECK:     scf.yield %[[RES]] : tensor<5x25xf32>
   //      CHECK:   %[[CAST:.*]] = tensor.cast %{{.*}} : tensor<5x25xf32> to tensor<?x25xf32>
   //      CHECK:   tensor.insert_slice %[[CAST]] into %{{.*}}[%{{.*}}, 0] [%{{.*}}, 25] [1, 1]
   // CHECK-SAME:     : tensor<?x25xf32> into tensor<24x25xf32>
-  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
+  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
   func.return %0 : tensor<24x25xf32>
 }
 
diff --git a/mlir/test/Dialect/Linalg/transform-op-interchange.mlir b/mlir/test/Dialect/Linalg/transform-op-interchange.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-interchange.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-interchange.mlir
@@ -10,7 +10,7 @@
   %0 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
-  } ins(%arg0 : tensor<?x?xf32>) outs(%arg1 : tensor<?x?xf32>) {
+  } ins(%arg0 : tensor<?x?xf32>) inits(%arg1 : tensor<?x?xf32>) {
   ^bb0(%arg2: f32, %arg3: f32):
     %1 = math.exp %arg2 : f32
     linalg.yield %1 : f32
@@ -28,7 +28,7 @@
 
 func.func @interchange_matmul(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> {
   // expected-note @below {{when applied to this op}}
-  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) inits(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
   return %0 : tensor<?x?xf32>
 }
 
diff --git a/mlir/test/Dialect/Linalg/transform-op-match.mlir b/mlir/test/Dialect/Linalg/transform-op-match.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-match.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-match.mlir
@@ -48,7 +48,7 @@
   %1 = linalg.generic {indexing_maps = [#map0, #map1],
                        iterator_types = ["parallel", "parallel", "parallel"]}
     ins(%arg0 : tensor<12x128x32xf32>)
-    outs(%0 : tensor<128x12x32xf32>) {
+    inits(%0 : tensor<128x12x32xf32>) {
   ^bb0(%arg1: f32, %arg2: f32):
     linalg.yield %arg1 : f32
   } -> tensor<128x12x32xf32>
diff --git a/mlir/test/Dialect/Linalg/transform-op-multitile-sizes.mlir b/mlir/test/Dialect/Linalg/transform-op-multitile-sizes.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-multitile-sizes.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-multitile-sizes.mlir
@@ -13,7 +13,7 @@
   %arg0: tensor<13x34xf32>, %arg1: tensor<34x42xf32>, %arg2: tensor<13x42xf32>)
     -> tensor<13x42xf32> {
   %0 = linalg.matmul  ins(%arg0, %arg1: tensor<13x34xf32>, tensor<34x42xf32>)
-                     outs(%arg2: tensor<13x42xf32>)
+                     inits(%arg2: tensor<13x42xf32>)
     -> tensor<13x42xf32>
   // The first application computes the total size.
   // CHECK: %{{.*}} = affine.apply #[[$MAP13]]()
@@ -45,7 +45,7 @@
   %arg0: tensor<13x34xf32>, %arg1: tensor<34x42xf32>, %arg2: tensor<13x42xf32>)
     -> tensor<13x42xf32> {
   %0 = linalg.matmul  ins(%arg0, %arg1: tensor<13x34xf32>, tensor<34x42xf32>)
-                     outs(%arg2: tensor<13x42xf32>)
+                     inits(%arg2: tensor<13x42xf32>)
     -> tensor<13x42xf32>
 
   return %0 : tensor<13x42xf32>
@@ -86,7 +86,7 @@
   %arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>)
     -> tensor<?x?xf32> {
   %0 = linalg.matmul  ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x?xf32>)
-                     outs(%arg2: tensor<?x?xf32>)
+                     inits(%arg2: tensor<?x?xf32>)
     -> tensor<?x?xf32>
 
   return %0 : tensor<?x?xf32>
@@ -107,7 +107,7 @@
     -> tensor<?x?xf32> {
   // expected-note @below {{payload op}}
   %0 = linalg.matmul  ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x?xf32>)
-                     outs(%arg2: tensor<?x?xf32>)
+                     inits(%arg2: tensor<?x?xf32>)
     -> tensor<?x?xf32>
 
   return %0 : tensor<?x?xf32>
diff --git a/mlir/test/Dialect/Linalg/transform-op-pack.mlir b/mlir/test/Dialect/Linalg/transform-op-pack.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-pack.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-pack.mlir
@@ -22,8 +22,8 @@
   // CHECK-SAME:   indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]]]
   // CHECK-SAME:   iterator_types = ["parallel", "reduction", "reduction"]
   // CHECK-SAME:   ins(%{{.*}} : tensor<3x2x4xf16>)
-  // CHECK-SAME:  outs(%{{.*}} : tensor<3xf16>)
-  %2 = linalg.generic #reduction_2d_trait ins(%t0 : tensor<3x7xf16>) outs(%t1 : tensor<3xf16>) {
+  // CHECK-SAME:  inits(%{{.*}} : tensor<3xf16>)
+  %2 = linalg.generic #reduction_2d_trait ins(%t0 : tensor<3x7xf16>) inits(%t1 : tensor<3xf16>) {
   ^bb0(%in: f16, %out: f16):
     %3 = arith.addf %in, %out : f16
     linalg.yield %3 : f16
@@ -64,8 +64,8 @@
   // CHECK-SAME:   indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]]]
   // CHECK-SAME:   iterator_types = ["reduction", "parallel", "reduction"]
   // CHECK-SAME:   ins(%{{.*}} : tensor<3x2x4xf16>)
-  // CHECK-SAME:  outs(%{{.*}} : tensor<3xf16>)
-  %2 = linalg.generic #col_reduction_2d_trait ins(%t0 : tensor<7x3xf16>) outs(%t1 : tensor<3xf16>) {
+  // CHECK-SAME:  inits(%{{.*}} : tensor<3xf16>)
+  %2 = linalg.generic #col_reduction_2d_trait ins(%t0 : tensor<7x3xf16>) inits(%t1 : tensor<3xf16>) {
   ^bb0(%in: f16, %out: f16):
     %3 = arith.addf %in, %out : f16
     linalg.yield %3 : f16
@@ -119,8 +119,8 @@
   // CHECK-SAME:   indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]]]
   // CHECK-SAME:   iterator_types = ["parallel", "reduction", "reduction"]
   // CHECK-SAME:   ins(%{{.*}} : tensor<?x?x4xf16>)
-  // CHECK-SAME:  outs(%{{.*}} : tensor<?xf16>)
-  %2 = linalg.generic #reduction_2d_trait ins(%t0 : tensor<?x?xf16>) outs(%t1 : tensor<?xf16>) {
+  // CHECK-SAME:  inits(%{{.*}} : tensor<?xf16>)
+  %2 = linalg.generic #reduction_2d_trait ins(%t0 : tensor<?x?xf16>) inits(%t1 : tensor<?xf16>) {
   ^bb0(%in: f16, %out: f16):
     %3 = arith.addf %in, %out : f16
     linalg.yield %3 : f16
@@ -165,8 +165,8 @@
   // CHECK-SAME:   indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]]]
   // CHECK-SAME:   iterator_types = ["parallel", "reduction", "parallel", "reduction"]
   // CHECK-SAME:   ins(%{{.*}} : tensor<?x?x3x4xf16>)
-  // CHECK-SAME:  outs(%{{.*}} : tensor<?x3xf16>)
-  %2 = linalg.generic #reduction_2d_trait ins(%t0 : tensor<?x?xf16>) outs(%t1 : tensor<?xf16>) {
+  // CHECK-SAME:  inits(%{{.*}} : tensor<?x3xf16>)
+  %2 = linalg.generic #reduction_2d_trait ins(%t0 : tensor<?x?xf16>) inits(%t1 : tensor<?xf16>) {
   ^bb0(%in: f16, %out: f16):
     %3 = arith.addf %in, %out : f16
     linalg.yield %3 : f16
@@ -209,9 +209,9 @@
   //      CHECK: linalg.generic {indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]], #[[$PACKED_MAP_2]]]
   // CHECK-SAME:     iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]} 
   // CHECK-SAME:  ins(%{{.*}} : tensor<?x?x2x4xf32>, tensor<?x?x3x4xf32>)
-  // CHECK-SAME: outs(%{{.*}} : tensor<?x?x3x2xf32>)
+  // CHECK-SAME: inits(%{{.*}} : tensor<?x?x3x2xf32>)
   %0 = linalg.matmul  ins(%A, %B: tensor<?x?xf32>, tensor<?x?xf32>)
-                     outs(%C: tensor<?x?xf32>)
+                     inits(%C: tensor<?x?xf32>)
     -> tensor<?x?xf32>
 
   //      CHECK: tensor.unpack %{{.*}} outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [3, 2]
@@ -258,9 +258,9 @@
   //      CHECK: linalg.generic {indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]], #[[$PACKED_MAP_2]]]
   // CHECK-SAME:     iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "parallel", "reduction"]} 
   // CHECK-SAME:  ins(%{{.*}} : tensor<14x64x28x28x8xf32>, tensor<256x64x1x1x4x8xf32>)
-  // CHECK-SAME: outs(%{{.*}} : tensor<14x256x28x28x4xf32>)
+  // CHECK-SAME: inits(%{{.*}} : tensor<14x256x28x28x4xf32>)
   %0 = linalg.conv_2d_nchw_fchw ins(%i, %f: tensor<14x512x28x28xf32>, tensor<1024x512x1x1xf32>)
-                                outs(%o: tensor<14x1024x28x28xf32>) -> tensor<14x1024x28x28xf32>
+                                inits(%o: tensor<14x1024x28x28xf32>) -> tensor<14x1024x28x28xf32>
 
   //      CHECK: tensor.unpack %{{.*}} inner_dims_pos = [1] inner_tiles = [4]
   // CHECK-SAME:   : tensor<14x256x28x28x4xf32> -> tensor<14x1024x28x28xf32>
@@ -298,7 +298,7 @@
   //      CHECK: linalg.generic {indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]], #[[$PACKED_MAP_2]]]
   // CHECK-SAME:     iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "parallel", "reduction"]} 
   // CHECK-SAME:  ins(%{{.*}} : tensor<?x1x?x?x6xf32>, tensor<1x?x?x?x4x6xf32>)
-  // CHECK-SAME: outs(%{{.*}} : tensor<?x1x?x?x4xf32>)
+  // CHECK-SAME: inits(%{{.*}} : tensor<?x1x?x?x4xf32>)
   %0 = linalg.conv_2d_nhwc_hwcf
      ins (%input, %filter: tensor<?x1x?x?xf32>, tensor<1x?x?x?xf32>)
     outs (%init: tensor<?x1x?x?xf32>) -> tensor<?x1x?x?xf32>
@@ -344,9 +344,9 @@
   //      CHECK: linalg.generic {indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]], #[[$PACKED_MAP_2]]]
   // CHECK-SAME:     iterator_types = ["parallel", "parallel", "reduction", "parallel", "reduction"]} 
   // CHECK-SAME:  ins(%{{.*}} : tensor<?x?x?xf32>, tensor<?x?x?x?xf32>)
-  // CHECK-SAME: outs(%{{.*}} : tensor<?x?x?xf32>)
+  // CHECK-SAME: inits(%{{.*}} : tensor<?x?x?xf32>)
   %0 = linalg.matmul  ins(%A, %B: tensor<?x?xf32>, tensor<?x?xf32>)
-                     outs(%C: tensor<?x?xf32>)
+                     inits(%C: tensor<?x?xf32>)
     -> tensor<?x?xf32>
 
   //      CHECK: tensor.unpack %{{.*}} inner_dims_pos = [1] inner_tiles = [%[[TS]]] into %[[C]]
@@ -367,7 +367,7 @@
 func.func @conv_cant_pack(%i: tensor<14x512x28x28xf32>, %f: tensor<1024x512x1x1xf32>,
                           %o: tensor<14x1024x28x28xf32>) -> tensor<14x1024x28x28xf32> {
   %0 = linalg.conv_2d_nchw_fchw ins(%i, %f: tensor<14x512x28x28xf32>, tensor<1024x512x1x1xf32>)
-                                outs(%o: tensor<14x1024x28x28xf32>) -> tensor<14x1024x28x28xf32>
+                                inits(%o: tensor<14x1024x28x28xf32>) -> tensor<14x1024x28x28xf32>
   return %0: tensor<14x1024x28x28xf32>
 }
 
@@ -385,10 +385,10 @@
 func.func @matmul(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %C: tensor<?x?xf32>)
     -> (tensor<?x?xf32>, tensor<?x?xf32>) {
   %0 = linalg.matmul  ins(%A, %B: tensor<?x?xf32>, tensor<?x?xf32>)
-                     outs(%C: tensor<?x?xf32>)
+                     inits(%C: tensor<?x?xf32>)
     -> tensor<?x?xf32>
   %1 = linalg.matmul  ins(%A, %B: tensor<?x?xf32>, tensor<?x?xf32>)
-                     outs(%C: tensor<?x?xf32>)
+                     inits(%C: tensor<?x?xf32>)
     -> tensor<?x?xf32>
   return %0, %1 : tensor<?x?xf32>, tensor<?x?xf32>
 }
@@ -407,7 +407,7 @@
 func.func @matmul(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %C: tensor<?x?xf32>)
     -> tensor<?x?xf32> {
   %0 = linalg.matmul  ins(%A, %B: tensor<?x?xf32>, tensor<?x?xf32>)
-                     outs(%C: tensor<?x?xf32>)
+                     inits(%C: tensor<?x?xf32>)
     -> tensor<?x?xf32>
   return %0 : tensor<?x?xf32>
 }
@@ -484,7 +484,7 @@
   %0 = tensor.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
   %f0 = arith.constant 0.0 : f32
   %1 = tensor.empty() : tensor<f32>
-  %2 = linalg.fill ins(%f0: f32) outs(%1 : tensor<f32>) -> tensor<f32>
+  %2 = linalg.fill ins(%f0: f32) inits(%1 : tensor<f32>) -> tensor<f32>
   return
 }
 
@@ -506,7 +506,7 @@
   %b = tensor.unpack %a inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %source : tensor<4x16x32x16xf32> -> tensor<128x256xf32>
   %f0 = arith.constant 0.0 : f32
   %1 = tensor.empty() : tensor<f32>
-  %2 = linalg.fill ins(%f0: f32) outs(%1 : tensor<f32>) -> tensor<f32>
+  %2 = linalg.fill ins(%f0: f32) inits(%1 : tensor<f32>) -> tensor<f32>
   return
 }
 
@@ -526,7 +526,7 @@
 func.func @no_matching_pack(%source: tensor<16xf32>) {
   %f0 = arith.constant 0.0 : f32
   %1 = tensor.empty() : tensor<4x4xf32>
-  %2 = linalg.fill ins(%f0: f32) outs(%1 : tensor<4x4xf32>) -> tensor<4x4xf32>
+  %2 = linalg.fill ins(%f0: f32) inits(%1 : tensor<4x4xf32>) -> tensor<4x4xf32>
   %b = tensor.unpack %2 inner_dims_pos = [0] inner_tiles = [4] into %source : tensor<4x4xf32> -> tensor<16xf32>
   return
 }
@@ -547,7 +547,7 @@
 func.func @invalid_outer_perm(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %C: tensor<?x?xf32>)
     -> tensor<?x?xf32> {
   %0 = linalg.matmul  ins(%A, %B: tensor<?x?xf32>, tensor<?x?xf32>)
-                     outs(%C: tensor<?x?xf32>)
+                     inits(%C: tensor<?x?xf32>)
     -> tensor<?x?xf32>
   return %0 : tensor<?x?xf32>
 }
@@ -573,7 +573,7 @@
 func.func @invalid_inner_perm(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %C: tensor<?x?xf32>)
     -> tensor<?x?xf32> {
   %0 = linalg.matmul  ins(%A, %B: tensor<?x?xf32>, tensor<?x?xf32>)
-                     outs(%C: tensor<?x?xf32>)
+                     inits(%C: tensor<?x?xf32>)
     -> tensor<?x?xf32>
   return %0 : tensor<?x?xf32>
 }
diff --git a/mlir/test/Dialect/Linalg/transform-op-pad.mlir b/mlir/test/Dialect/Linalg/transform-op-pad.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-pad.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-pad.mlir
@@ -25,8 +25,8 @@
 
   //      CHECK: %[[T5:.*]] = linalg.matmul
   // CHECK-SAME:              ins(%[[T3]], %[[T4]] : tensor<4x7xf32>, tensor<7x5xf32>)
-  // CHECK-SAME:              outs(%[[T2]] : tensor<4x5xf32>)
-  %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
+  // CHECK-SAME:              inits(%[[T2]] : tensor<4x5xf32>)
+  %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) inits(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
   %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32>
   func.return %5 : tensor<24x25xf32>
 }
@@ -67,8 +67,8 @@
 
   //      CHECK: %[[T5:.*]] = linalg.matmul
   // CHECK-SAME:              ins(%[[T3]], %[[T4]] : tensor<4x7xf32>, tensor<7x5xf32>)
-  // CHECK-SAME:              outs(%[[T2]] : tensor<4x5xf32>)
-  %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
+  // CHECK-SAME:              inits(%[[T2]] : tensor<4x5xf32>)
+  %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) inits(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
   %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32>
   func.return %5 : tensor<24x25xf32>
 }
@@ -89,7 +89,7 @@
                %arg1: tensor<12x25xf32>,
                %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> {
   // expected-note @below {{when applied to this op}}
-  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
+  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
   func.return %0 : tensor<24x25xf32>
 }
 
@@ -110,7 +110,7 @@
                %arg1: tensor<12x25xf32>,
                %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> {
   // expected-note @below {{when applied to this op}}
-  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
+  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
   func.return %0 : tensor<24x25xf32>
 }
 
@@ -132,7 +132,7 @@
                %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> {
   // This is attached to an error that is silenceable and is not reported by this transform
   //   {{when applied to this op}}
-  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
+  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
   func.return %0 : tensor<24x25xf32>
 }
 
diff --git a/mlir/test/Dialect/Linalg/transform-op-rewrite-in-destination-passing-style.mlir b/mlir/test/Dialect/Linalg/transform-op-rewrite-in-destination-passing-style.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-rewrite-in-destination-passing-style.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-rewrite-in-destination-passing-style.mlir
@@ -78,7 +78,7 @@
 //       CHECK:   %[[empty:.*]] = tensor.empty(%[[s1]], %[[s2]]) : tensor<?x?xindex>
 //       CHECK:   %[[generic:.*]] = linalg.generic
 //  CHECK-SAME:       {indexing_maps = [#[[$map]]], iterator_types = ["parallel", "parallel"]}
-//  CHECK-SAME:       outs(%[[empty]] : tensor<?x?xindex>) {
+//  CHECK-SAME:       inits(%[[empty]] : tensor<?x?xindex>) {
 //       CHECK:     %[[i0:.*]] = linalg.index 0
 //       CHECK:     %[[i1:.*]] = linalg.index 1
 //       CHECK:     %[[added:.*]] = arith.addi %[[i0]], %[[i1]]
@@ -116,7 +116,7 @@
 //       CHECK:   %[[empty:.*]] = tensor.empty(%[[size0]], %[[size1]]) : tensor<?x?xindex>
 //       CHECK:   %[[generic:.*]] = linalg.generic
 //  CHECK-SAME:       {indexing_maps = [#[[$map2]]], iterator_types = ["parallel", "parallel"]}
-//  CHECK-SAME:       outs(%[[empty]] : tensor<?x?xindex>) {
+//  CHECK-SAME:       inits(%[[empty]] : tensor<?x?xindex>) {
 //       CHECK:     %[[i0:.*]] = linalg.index 0
 //       CHECK:     %[[i1:.*]] = linalg.index 1
 //       CHECK:     %[[mul:.*]] = arith.muli %[[i0]], %[[i1]]
@@ -155,7 +155,7 @@
 //   CHECK-DAG:   %[[size0:.*]] = affine.apply #[[$map]]()[%[[h1]], %[[dim0]]]
 //   CHECK-DAG:   %[[size1:.*]] = affine.apply #[[$map1]]()[%[[l2]], %[[h2]]]
 //       CHECK:   %[[empty:.*]] = tensor.empty(%[[size0]], %[[size1]]) : tensor<?x?xindex>
-//       CHECK:   %[[filled:.*]] = linalg.fill ins(%[[c50]] : index) outs(%[[empty]] : tensor<?x?xindex>)
+//       CHECK:   %[[filled:.*]] = linalg.fill ins(%[[c50]] : index) inits(%[[empty]] : tensor<?x?xindex>)
 //   CHECK-DAG:   %[[dim0:.*]] = tensor.dim %[[t1]], %[[c0]]
 //       CHECK:   %[[inserted:.*]] = tensor.insert_slice %[[t1]] into %[[filled]][5, %[[l2]]] [%[[dim0]], 10] [1, 1] : tensor<?x10xindex> into tensor<?x?xindex>
 //       CHECK:   return %[[inserted]]
@@ -188,7 +188,7 @@
 //   CHECK-DAG:   %[[size0:.*]] = affine.apply #[[$map]]()[%[[h1]], %[[dim0]]]
 //   CHECK-DAG:   %[[size1:.*]] = affine.apply #[[$map1]]()[%[[l2]], %[[h2]]]
 //       CHECK:   %[[empty:.*]] = tensor.empty(%[[size0]], %[[size1]]) : tensor<?x?xindex>
-//       CHECK:   %[[filled:.*]] = linalg.fill ins(%[[padding]] : index) outs(%[[empty]] : tensor<?x?xindex>)
+//       CHECK:   %[[filled:.*]] = linalg.fill ins(%[[padding]] : index) inits(%[[empty]] : tensor<?x?xindex>)
 //   CHECK-DAG:   %[[dim0:.*]] = tensor.dim %[[t1]], %[[c0]]
 //       CHECK:   %[[inserted:.*]] = tensor.insert_slice %[[t1]] into %[[filled]][5, %[[l2]]] [%[[dim0]], 10] [1, 1] : tensor<?x10xindex> into tensor<?x?xindex>
 //       CHECK:   return %[[inserted]]
@@ -217,7 +217,7 @@
 //   CHECK-NOT:   generic
 //   CHECK-NOT:   insert_slice
 //       CHECK:   %[[alloc_tensor:.*]] = bufferization.alloc_tensor(%{{.*}}) : tensor<?x?xindex>
-//       CHECK:   %[[copied:.*]] = linalg.copy ins(%[[t1]] : tensor<?x?xindex>) outs(%[[alloc_tensor]] : tensor<?x?xindex>) -> tensor<?x?xindex>
+//       CHECK:   %[[copied:.*]] = linalg.copy ins(%[[t1]] : tensor<?x?xindex>) inits(%[[alloc_tensor]] : tensor<?x?xindex>) -> tensor<?x?xindex>
 //       CHECK:   return %[[copied]]
 func.func @tensor_pad_nofold(%t1: tensor<?x?xindex>, %padding: index)
     -> tensor<?x?xindex> {
diff --git a/mlir/test/Dialect/Linalg/transform-op-scalarize.mlir b/mlir/test/Dialect/Linalg/transform-op-scalarize.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-scalarize.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-scalarize.mlir
@@ -12,7 +12,7 @@
   // CHECK:     scf.yield %[[INS_2]] : tensor<?x25xf32>
   // CHECK:   %[[INS_1:.*]] = tensor.insert_slice %[[RES_LOOP_2]] into %{{.*}}, 25] [1, 1] : tensor<?x25xf32> into tensor<24x25xf32>
   // CHECK:   scf.yield %[[INS_1]] : tensor<24x25xf32>
-  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
+  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
 
   // CHECK: return %[[RES_LOOP_1]] : tensor<24x25xf32>
   func.return %0 : tensor<24x25xf32>
diff --git a/mlir/test/Dialect/Linalg/transform-op-split-reduction-by-scaling.mlir b/mlir/test/Dialect/Linalg/transform-op-split-reduction-by-scaling.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-split-reduction-by-scaling.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-split-reduction-by-scaling.mlir
@@ -7,14 +7,14 @@
   //      CHECK: linalg.generic
   // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"]
   // CHECK-SAME: ins(%{{[a-zA-Z0-9]*}}, %{{[a-zA-Z0-9]*}}, %{{[a-zA-Z0-9]*}} : tensor<?x256xf32>, tensor<256x32xf32>, tensor<64x4xi1>)
-  // CHECK-SAME: outs(%{{[a-zA-Z0-9]*}} : tensor<?x32x64xf32>) {
+  // CHECK-SAME: inits(%{{[a-zA-Z0-9]*}} : tensor<?x32x64xf32>) {
 
   //      CHECK: linalg.generic
   // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]
   // CHECK-SAME: ins(%{{[a-zA-Z0-9]*}} : tensor<?x32x64xf32>)
-  // CHECK-SAME: outs(%{{[a-zA-Z0-9]*}} : tensor<?x32xf32>) {
+  // CHECK-SAME: inits(%{{[a-zA-Z0-9]*}} : tensor<?x32xf32>) {
   %0 = linalg.matmul ins(%A, %B: tensor<?x256xf32>, tensor<256x32xf32>)
-                    outs(%C: tensor<?x32xf32>) -> tensor<?x32xf32>
+                    inits(%C: tensor<?x32xf32>) -> tensor<?x32xf32>
   return %0: tensor<?x32xf32>
 }
 
diff --git a/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir b/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir
@@ -2,7 +2,7 @@
 
 func.func @matmul_split(%A : tensor<16x256xf32>, %B: tensor<256x32xf32>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> {
   %0 = linalg.matmul ins(%A, %B: tensor<16x256xf32>, tensor<256x32xf32>)
-                    outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
+                    inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
   return %0: tensor<16x32xf32>
 }
 
@@ -16,16 +16,16 @@
 //  CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<16x256xf32> into tensor<16x4x64xf32>
 //  CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<256x32xf32> into tensor<4x64x32xf32>
 //  CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<16x32x4xf32>
-//      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<16x32x4xf32>) -> tensor<16x32x4xf32>
+//      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) inits(%[[INI]] : tensor<16x32x4xf32>) -> tensor<16x32x4xf32>
 //      CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]]
 // CHECK-SAME:   , iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
-// CHECK-SAME:   ins(%[[I1]], %[[I2]] : tensor<16x4x64xf32>, tensor<4x64x32xf32>) outs(%[[F]] : tensor<16x32x4xf32>) {
+// CHECK-SAME:   ins(%[[I1]], %[[I2]] : tensor<16x4x64xf32>, tensor<4x64x32xf32>) inits(%[[F]] : tensor<16x32x4xf32>) {
 //      CHECK:   arith.mulf
 //      CHECK:   arith.addf
 //      CHECK:   linalg.yield
 //      CHECK: } -> tensor<16x32x4xf32>
 //      CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]],
-// CHECK-SAME:   iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[G]] : tensor<16x32x4xf32>) outs(%{{.*}} : tensor<16x32xf32>) {
+// CHECK-SAME:   iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[G]] : tensor<16x32x4xf32>) inits(%{{.*}} : tensor<16x32xf32>) {
 //      CHECK:   arith.addf
 //      CHECK:   linalg.yield %{{.*}} : f32
 //      CHECK: } -> tensor<16x32xf32>
@@ -45,7 +45,7 @@
                                           affine_map<(d0) -> ()>],
    iterator_types = ["reduction"]}
    ins(%arg0, %arg1 : tensor<32xf32>, tensor<f32>)
-   outs(%out : tensor<f32>) {
+   inits(%out : tensor<f32>) {
     ^bb0(%arg7: f32, %arg8: f32, %arg9: f32):
       %40 = arith.subf %arg7, %arg8 : f32
       %41 = math.exp %40 : f32
@@ -64,16 +64,16 @@
 //  CHECK-DAG: %[[ID:.*]] = arith.constant 1.000000e+00 : f32
 //  CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1]] : tensor<32xf32> into tensor<4x8xf32>
 //  CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<4xf32>
-//      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<4xf32>) -> tensor<4xf32>
+//      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) inits(%[[INI]] : tensor<4xf32>) -> tensor<4xf32>
 //      CHECK: %[[G:.*]] = linalg.generic
 //      CHECK:   {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]],
-//      CHECK:   iterator_types = ["parallel", "reduction"]} ins(%[[I1]], %{{.*}} : tensor<4x8xf32>, tensor<f32>) outs(%[[F]] : tensor<4xf32>) {
+//      CHECK:   iterator_types = ["parallel", "reduction"]} ins(%[[I1]], %{{.*}} : tensor<4x8xf32>, tensor<f32>) inits(%[[F]] : tensor<4xf32>) {
 //      CHECK:   arith.subf
 //      CHECK:   math.exp
 //      CHECK:   arith.mulf
 //      CHECK:   linalg.yield
 //      CHECK: } -> tensor<4xf32>
-//      CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["reduction"]} ins(%[[G]] : tensor<4xf32>) outs(%{{.*}} : tensor<f32>) {
+//      CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["reduction"]} ins(%[[G]] : tensor<4xf32>) inits(%{{.*}} : tensor<f32>) {
 //      CHECK:   arith.mulf
 //      CHECK:   linalg.yield
 //      CHECK: } -> tensor<f32>
@@ -97,7 +97,7 @@
         affine_map<(d0, d1, d2) -> (d2, d0)>
       ],
       iterator_types = ["parallel", "reduction", "parallel"]
-    } ins(%input, %input_2 : tensor<32x2xf32>, tensor<5x32xf32>) outs(%output : tensor<5x2xf32>) {
+    } ins(%input, %input_2 : tensor<32x2xf32>, tensor<5x32xf32>) inits(%output : tensor<5x2xf32>) {
     ^bb0(%arg0: f32, %arg1: f32, %arg2: f32):
       %3 = arith.addf %arg0, %arg1 : f32
       %4 = arith.maxf %3, %arg2 : f32
@@ -116,15 +116,15 @@
 //  CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<32x2xf32> into tensor<4x8x2xf32>
 //  CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<5x32xf32> into tensor<5x4x8xf32>
 //  CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<5x2x4xf32>
-//      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<5x2x4xf32>) -> tensor<5x2x4xf32>
+//      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) inits(%[[INI]] : tensor<5x2x4xf32>) -> tensor<5x2x4xf32>
 //      CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "reduction", "parallel", "parallel"]}
-// CHECK-SAME:   ins(%[[I1]], %[[I2]] : tensor<4x8x2xf32>, tensor<5x4x8xf32>) outs(%[[F]] : tensor<5x2x4xf32>) {
+// CHECK-SAME:   ins(%[[I1]], %[[I2]] : tensor<4x8x2xf32>, tensor<5x4x8xf32>) inits(%[[F]] : tensor<5x2x4xf32>) {
 //      CHECK:   arith.addf
 //      CHECK:   arith.maxf
 //      CHECK:   linalg.yield
 //      CHECK: } -> tensor<5x2x4xf32>
 //      CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["parallel", "parallel", "reduction"]}
-// CHECK-SAME:   ins(%[[G]] : tensor<5x2x4xf32>) outs(%{{.*}} : tensor<5x2xf32>) {
+// CHECK-SAME:   ins(%[[G]] : tensor<5x2x4xf32>) inits(%{{.*}} : tensor<5x2xf32>) {
 //      CHECK:   arith.maxf
 //      CHECK:   linalg.yield
 //      CHECK:  } -> tensor<5x2xf32>
@@ -140,7 +140,7 @@
 
 func.func @matmul_split(%A : tensor<16x256xf32>, %B: tensor<256x32xf32>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> {
   %0 = linalg.matmul ins(%A, %B: tensor<16x256xf32>, tensor<256x32xf32>)
-                    outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
+                    inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
   return %0: tensor<16x32xf32>
 }
 
@@ -154,16 +154,16 @@
 //  CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<16x256xf32> into tensor<16x64x4xf32>
 //  CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<256x32xf32> into tensor<64x4x32xf32>
 //  CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<16x32x4xf32>
-//      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<16x32x4xf32>) -> tensor<16x32x4xf32>
+//      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) inits(%[[INI]] : tensor<16x32x4xf32>) -> tensor<16x32x4xf32>
 //      CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]]
 // CHECK-SAME:   , iterator_types = ["parallel", "parallel", "reduction", "parallel"]}
-// CHECK-SAME:   ins(%[[I1]], %[[I2]] : tensor<16x64x4xf32>, tensor<64x4x32xf32>) outs(%[[F]] : tensor<16x32x4xf32>) {
+// CHECK-SAME:   ins(%[[I1]], %[[I2]] : tensor<16x64x4xf32>, tensor<64x4x32xf32>) inits(%[[F]] : tensor<16x32x4xf32>) {
 //      CHECK:   arith.mulf
 //      CHECK:   arith.addf
 //      CHECK:   linalg.yield
 //      CHECK: } -> tensor<16x32x4xf32>
 //      CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]],
-// CHECK-SAME:   iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[G]] : tensor<16x32x4xf32>) outs(%{{.*}} : tensor<16x32xf32>) {
+// CHECK-SAME:   iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[G]] : tensor<16x32x4xf32>) inits(%{{.*}} : tensor<16x32xf32>) {
 //      CHECK:   arith.addf
 //      CHECK:   linalg.yield %{{.*}} : f32
 //      CHECK: } -> tensor<16x32xf32>
@@ -183,7 +183,7 @@
                                           affine_map<(d0) -> ()>],
    iterator_types = ["reduction"]}
    ins(%arg0, %arg1 : tensor<32xf32>, tensor<f32>)
-   outs(%out : tensor<f32>) {
+   inits(%out : tensor<f32>) {
     ^bb0(%arg7: f32, %arg8: f32, %arg9: f32):
       %40 = arith.subf %arg7, %arg8 : f32
       %41 = math.exp %40 : f32
@@ -202,16 +202,16 @@
 //  CHECK-DAG: %[[ID:.*]] = arith.constant 1.000000e+00 : f32
 //  CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1]] : tensor<32xf32> into tensor<8x4xf32>
 //  CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<4xf32>
-//      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<4xf32>) -> tensor<4xf32>
+//      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) inits(%[[INI]] : tensor<4xf32>) -> tensor<4xf32>
 //      CHECK: %[[G:.*]] = linalg.generic
 //      CHECK:   {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]],
-//      CHECK:   iterator_types = ["reduction", "parallel"]} ins(%[[I1]], %{{.*}} : tensor<8x4xf32>, tensor<f32>) outs(%[[F]] : tensor<4xf32>) {
+//      CHECK:   iterator_types = ["reduction", "parallel"]} ins(%[[I1]], %{{.*}} : tensor<8x4xf32>, tensor<f32>) inits(%[[F]] : tensor<4xf32>) {
 //      CHECK:   arith.subf
 //      CHECK:   math.exp
 //      CHECK:   arith.mulf
 //      CHECK:   linalg.yield
 //      CHECK: } -> tensor<4xf32>
-//      CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["reduction"]} ins(%[[G]] : tensor<4xf32>) outs(%{{.*}} : tensor<f32>) {
+//      CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["reduction"]} ins(%[[G]] : tensor<4xf32>) inits(%{{.*}} : tensor<f32>) {
 //      CHECK:   arith.mulf
 //      CHECK:   linalg.yield
 //      CHECK: } -> tensor<f32>
@@ -235,7 +235,7 @@
         affine_map<(d0, d1, d2) -> (d2, d0)>
       ],
       iterator_types = ["parallel", "reduction", "parallel"]
-    } ins(%input, %input_2 : tensor<32x2xf32>, tensor<5x32xf32>) outs(%output : tensor<5x2xf32>) {
+    } ins(%input, %input_2 : tensor<32x2xf32>, tensor<5x32xf32>) inits(%output : tensor<5x2xf32>) {
     ^bb0(%arg0: f32, %arg1: f32, %arg2: f32):
       %3 = arith.addf %arg0, %arg1 : f32
       %4 = arith.minf %3, %arg2 : f32
@@ -254,15 +254,15 @@
 //  CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<32x2xf32> into tensor<8x4x2xf32>
 //  CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<5x32xf32> into tensor<5x8x4xf32>
 //  CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<5x2x4xf32>
-//      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<5x2x4xf32>) -> tensor<5x2x4xf32>
+//      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) inits(%[[INI]] : tensor<5x2x4xf32>) -> tensor<5x2x4xf32>
 //      CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "reduction", "parallel", "parallel"]}
-// CHECK-SAME:   ins(%[[I1]], %[[I2]] : tensor<8x4x2xf32>, tensor<5x8x4xf32>) outs(%[[F]] : tensor<5x2x4xf32>) {
+// CHECK-SAME:   ins(%[[I1]], %[[I2]] : tensor<8x4x2xf32>, tensor<5x8x4xf32>) inits(%[[F]] : tensor<5x2x4xf32>) {
 //      CHECK:   arith.addf
 //      CHECK:   arith.minf
 //      CHECK:   linalg.yield
 //      CHECK: } -> tensor<5x2x4xf32>
 //      CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["parallel", "parallel", "reduction"]}
-// CHECK-SAME:   ins(%[[G]] : tensor<5x2x4xf32>) outs(%{{.*}} : tensor<5x2xf32>) {
+// CHECK-SAME:   ins(%[[G]] : tensor<5x2x4xf32>) inits(%{{.*}} : tensor<5x2xf32>) {
 //      CHECK:   arith.minf
 //      CHECK:   linalg.yield
 //      CHECK:  } -> tensor<5x2xf32>
diff --git a/mlir/test/Dialect/Linalg/transform-op-split.mlir b/mlir/test/Dialect/Linalg/transform-op-split.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-split.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-split.mlir
@@ -17,7 +17,7 @@
   // CHECK: %[[OUT_SLICE_LOW:.+]] = tensor.extract_slice %[[OUT]][0] [42] [1] : tensor<100xf32> to tensor<42xf32>
   // CHECK: %[[RES_SLICE_LOW:.+]] = linalg.generic
   // CHECK:   ins(%[[IN_SLICE_LOW]]
-  // CHECK:   outs(%[[OUT_SLICE_LOW]]
+  // CHECK:   inits(%[[OUT_SLICE_LOW]]
   // CHECK:   linalg.index 0
   // CHECK:   func.call @elem
   // CHECK: %[[RES_PARTIAL:.+]] = tensor.insert_slice %[[RES_SLICE_LOW]] into %[[OUT]][0] [42] [1]
@@ -26,7 +26,7 @@
   // CHECK: %[[OUT_SLICE_HIGH:.+]] = tensor.extract_slice %[[RES_PARTIAL]][42] [58] [1] : tensor<100xf32> to tensor<58xf32>
   // CHECK: %[[RES_SLICE_HIGH:.+]] = linalg.generic
   // CHECK:   ins(%[[IN_SLICE_HIGH]]
-  // CHECK:   outs(%[[OUT_SLICE_HIGH]]
+  // CHECK:   inits(%[[OUT_SLICE_HIGH]]
   // CHECK:   %[[IDX:.+]] = linalg.index 0
   // CHECK:   affine.apply #[[$ADD_42_MAP]](%[[IDX]])
   // CHECK:   func.call @elem
@@ -35,7 +35,7 @@
     indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>],
     iterator_types = ["parallel"]
   }
-  ins(%arg0: tensor<100xf32>) outs(%arg1: tensor<100xf32>) {
+  ins(%arg0: tensor<100xf32>) inits(%arg1: tensor<100xf32>) {
   ^bb0(%0: f32, %1: f32):
     %i = linalg.index 0 : index
     %call_res = func.call @elem(%0, %i, %i) : (f32, index, index) -> f32
@@ -63,14 +63,14 @@
   // the splitting altogether.
   // CHECK: %[[RES_SLICE_LOW:.+]] = linalg.generic
   // CHECK:   ins(%[[IN]]
-  // CHECK:   outs(%[[OUT]]
+  // CHECK:   inits(%[[OUT]]
   // CHECK:   linalg.index 0
   // CHECK:   func.call @elem
   %0 = linalg.generic {
     indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>],
     iterator_types = ["parallel"]
   }
-  ins(%arg0: tensor<10xf32>) outs(%arg1: tensor<10xf32>) {
+  ins(%arg0: tensor<10xf32>) inits(%arg1: tensor<10xf32>) {
   ^bb0(%0: f32, %1: f32):
     %i = linalg.index 0 : index
     %call_res = func.call @elem(%0, %i, %i) : (f32, index, index) -> f32
@@ -102,7 +102,7 @@
   // CHECK: %[[OUT_SLICE_LOW:.+]] = tensor.extract_slice %[[OUT:.+]][0] [%[[SPLIT_LOW]]] [1] : tensor<100xf32> to tensor<?xf32>
   // CHECK: %[[RES_SLICE_LOW:.+]] = linalg.generic
   // CHECK:   ins(%[[IN_SLICE_LOW]]
-  // CHECK:   outs(%[[OUT_SLICE_LOW]]
+  // CHECK:   inits(%[[OUT_SLICE_LOW]]
   // CHECK: %[[PARTIAL:.+]] = tensor.insert_slice %[[RES_SLICE_LOW]] into %[[OUT]][0] [%[[SPLIT_LOW]]] [1]
   //
   // CHECK: %[[SPLIT_HIGH_2:.+]] = affine.apply #[[$MAP_S_MINUS_100]]()[%[[SPLIT_LOW]]]
@@ -111,7 +111,7 @@
   // CHECK: %[[OUT_SLICE_HIGH:.+]] = tensor.extract_slice %[[PARTIAL:.+]][%[[SPLIT_LOW]]] [%[[SPLIT_HIGH_3]]] [1] : tensor<100xf32> to tensor<?xf32>
   // CHECK: %[[RES_SLICE_HIGH:.+]] = linalg.generic
   // CHECK:   ins(%[[IN_SLICE_HIGH]]
-  // CHECK:   outs(%[[OUT_SLICE_HIGH]]
+  // CHECK:   inits(%[[OUT_SLICE_HIGH]]
   // CHECK: %[[SPLIT_HIGH_4:.+]] = affine.apply #[[$MAP_S_MINUS_100]]()[%[[SPLIT_LOW]]]
   // CHECK: tensor.insert_slice %[[RES_SLICE_HIGH]] into %[[PARTIAL]][%[[SPLIT_LOW]]] [%[[SPLIT_HIGH_4]]] [1]
   %0 = func.call @get_size() : () -> index
@@ -119,7 +119,7 @@
     indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>],
     iterator_types = ["parallel"]
   }
-  ins(%arg0: tensor<100xf32>) outs(%arg1: tensor<100xf32>) {
+  ins(%arg0: tensor<100xf32>) inits(%arg1: tensor<100xf32>) {
   ^bb0(%3: f32, %4: f32):
     %5 = arith.addf %3, %4 : f32
     linalg.yield %5 : f32
@@ -147,7 +147,7 @@
   // CHECK:      %[[OUT_1:.+]] = tensor.extract_slice %[[OUT:.+]][0, 0]
   // CHECK:      %[[RES_1:.+]] = linalg.generic
   // CHECK-SAME:   ins(%[[IN_1]] : tensor<4x34xf32>)
-  // CHECK-SAME:   outs(%[[OUT_1]] : tensor<4x34xf32>)
+  // CHECK-SAME:   inits(%[[OUT_1]] : tensor<4x34xf32>)
   // CHECK:      %[[PARTIAL_1:.+]] = tensor.insert_slice %[[RES_1]] into %[[OUT]]
   //
   // CHECK:      %[[IN_2:.+]] = tensor.extract_slice %[[IN]]
@@ -158,14 +158,14 @@
   // CHECK:      %[[OUT_21:.+]] = tensor.extract_slice %[[OUT_2]]
   // CHECK:      %[[RES_21:.+]] = linalg.generic
   // CHECK-SAME:   ins(%[[IN_21]] : tensor<6x16xf32>)
-  // CHECK-SAME:   outs(%[[OUT_21]] : tensor<6x16xf32>)
+  // CHECK-SAME:   inits(%[[OUT_21]] : tensor<6x16xf32>)
   // CHECK:      %[[PARTIAL_21:.+]] = tensor.insert_slice %[[RES_21]] into %[[OUT_2]]
   //
   // CHECK:      %[[IN_22:.+]] = tensor.extract_slice %[[IN_2]]
   // CHECK:      %[[OUT_22:.+]] = tensor.extract_slice %[[PARTIAL_21]]
   // CHECK:      %[[RES_22:.+]] = linalg.generic
   // CHECK-SAME:   ins(%[[IN_22]] : tensor<6x18xf32>)
-  // CHECK-SAME:   outs(%[[OUT_22]] : tensor<6x18xf32>)
+  // CHECK-SAME:   inits(%[[OUT_22]] : tensor<6x18xf32>)
   // CHECK:      %[[PARTIAL_22:.+]] = tensor.insert_slice %[[RES_22]] into %[[PARTIAL_21]]
   // CHECK:      %[[PARTIAL_2:.+]] = tensor.insert_slice %[[PARTIAL_22]] into %[[PARTIAL_1]]
   %0 = linalg.generic {
@@ -174,7 +174,7 @@
     iterator_types = ["parallel", "parallel"]
   }
   ins(%arg0: tensor<10x34xf32>)
-  outs(%arg1: tensor<10x34xf32>) {
+  inits(%arg1: tensor<10x34xf32>) {
   ^bb0(%0: f32, %1: f32):
     %i = linalg.index 0 : index
     %j = linalg.index 1 : index
@@ -211,7 +211,7 @@
     indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>],
     iterator_types = ["parallel"]
   }
-  ins(%arg0: tensor<100xf32>) outs(%arg1: tensor<100xf32>) {
+  ins(%arg0: tensor<100xf32>) inits(%arg1: tensor<100xf32>) {
   ^bb0(%3: f32, %4: f32):
     linalg.yield %3 : f32
   } -> tensor<100xf32>
@@ -235,7 +235,7 @@
     indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>],
     iterator_types = ["parallel"]
   }
-  ins(%arg0: tensor<100xf32>) outs(%arg1: tensor<100xf32>) {
+  ins(%arg0: tensor<100xf32>) inits(%arg1: tensor<100xf32>) {
   ^bb0(%3: f32, %4: f32):
     linalg.yield %3 : f32
   } -> tensor<100xf32>
@@ -271,7 +271,7 @@
     indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>],
     iterator_types = ["parallel"]
   }
-  ins(%arg0: tensor<100xf32>) outs(%arg1: tensor<100xf32>) {
+  ins(%arg0: tensor<100xf32>) inits(%arg1: tensor<100xf32>) {
   ^bb0(%0: f32, %1: f32):
     linalg.yield %0 : f32
   } -> tensor<100xf32>
@@ -299,7 +299,7 @@
     indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>],
     iterator_types = ["parallel"]
   }
-  ins(%arg0: tensor<100xf32>) outs(%arg1: tensor<100xf32>) {
+  ins(%arg0: tensor<100xf32>) inits(%arg1: tensor<100xf32>) {
   ^bb0(%arg4: f32, %arg5: f32):
     %i = linalg.index 0 : index
     %call_res = func.call @elem(%arg4, %i, %i) : (f32, index, index) -> f32
@@ -310,7 +310,7 @@
     indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>],
     iterator_types = ["parallel"]
   }
-  ins(%arg2: tensor<200xf32>) outs(%arg3: tensor<200xf32>) {
+  ins(%arg2: tensor<200xf32>) inits(%arg3: tensor<200xf32>) {
   ^bb0(%arg4: f32, %arg5: f32):
     %i = linalg.index 0 : index
     %call_res = func.call @elem(%arg4, %i, %i) : (f32, index, index) -> f32
diff --git a/mlir/test/Dialect/Linalg/transform-op-tile.mlir b/mlir/test/Dialect/Linalg/transform-op-tile.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-tile.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-tile.mlir
@@ -21,13 +21,13 @@
 //      CHECK:       %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<128x128xf32> to tensor<4x4xf32>
 //      CHECK:       %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<128x128xf32> to tensor<4x4xf32>
 //      CHECK:       %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor<4x4xf32>, tensor<4x4xf32>)
-// CHECK-SAME:                                   outs(%[[sTC]] : tensor<4x4xf32>)  -> tensor<4x4xf32>
+// CHECK-SAME:                                   inits(%[[sTC]] : tensor<4x4xf32>)  -> tensor<4x4xf32>
 //      CHECK:       %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}]  : tensor<4x4xf32> into tensor<128x128xf32>
 //      CHECK:       scf.yield %[[TD]] : tensor<128x128xf32>
 //      CHECK:     scf.yield %[[TD2]] : tensor<128x128xf32>
 //      CHECK:   scf.yield %[[TD1]] : tensor<128x128xf32>
   %0 = linalg.matmul  ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>)
-                     outs(%arg2: tensor<128x128xf32>)
+                     inits(%arg2: tensor<128x128xf32>)
     -> tensor<128x128xf32>
 
 //      CHECK: return %[[TD0]] : tensor<128x128xf32>
@@ -60,14 +60,14 @@
 //      CHECK:       %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<128x128xf32> to tensor<4x?xf32>
 //      CHECK:       %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<128x128xf32> to tensor<?x?xf32>
 //      CHECK:       %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor<?x4xf32>, tensor<4x?xf32>)
-// CHECK-SAME:                                   outs(%[[sTC]] : tensor<?x?xf32>)  -> tensor<?x?xf32>
+// CHECK-SAME:                                   inits(%[[sTC]] : tensor<?x?xf32>)  -> tensor<?x?xf32>
 //      CHECK:       %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}]  : tensor<?x?xf32> into tensor<128x128xf32>
 //      CHECK:       scf.yield %[[TD]] : tensor<128x128xf32>
 //      CHECK:     scf.yield %[[TD2]] : tensor<128x128xf32>
 //      CHECK:   scf.yield %[[TD1]] : tensor<128x128xf32>
   %sz = func.call @get_dynamic_tile_size() : () -> index
   %0 = linalg.matmul  ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>)
-                     outs(%arg2: tensor<128x128xf32>)
+                     inits(%arg2: tensor<128x128xf32>)
     -> tensor<128x128xf32>
 
 //      CHECK: return %[[TD0]] : tensor<128x128xf32>
@@ -91,10 +91,10 @@
   %arg0: tensor<128x128xf32>, %arg1: tensor<128x128xf32>, %arg2: tensor<128x128xf32>)
     -> (tensor<128x128xf32>, tensor<128x128xf32>) {
   %0 = linalg.matmul  ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>)
-                     outs(%arg2: tensor<128x128xf32>)
+                     inits(%arg2: tensor<128x128xf32>)
     -> tensor<128x128xf32>
   %1 = linalg.matmul  ins(%0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>)
-                     outs(%arg2: tensor<128x128xf32>)
+                     inits(%arg2: tensor<128x128xf32>)
     -> tensor<128x128xf32>
   return %0, %1 : tensor<128x128xf32>, tensor<128x128xf32>
 }
@@ -116,10 +116,10 @@
   %arg0: tensor<128x128xf32>, %arg1: tensor<128x128xf32>, %arg2: tensor<128x128xf32>)
     -> (tensor<128x128xf32>, tensor<128x128xf32>) {
   %0 = linalg.matmul  ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>)
-                     outs(%arg2: tensor<128x128xf32>)
+                     inits(%arg2: tensor<128x128xf32>)
     -> tensor<128x128xf32>
   %1 = linalg.matmul  ins(%0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>)
-                     outs(%arg2: tensor<128x128xf32>)
+                     inits(%arg2: tensor<128x128xf32>)
     -> tensor<128x128xf32>
   return %0, %1 : tensor<128x128xf32>, tensor<128x128xf32>
 }
diff --git a/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir b/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir
@@ -12,7 +12,7 @@
   // CHECK: %[[vC:.+]] = vector.transfer_read %[[C]]
   // CHECK: %[[vR:.+]] = vector.contract {{.*}} %[[vA]], %[[vB]], %[[vC]]
   // CHECK: vector.transfer_write %[[vR]], %[[C]]
-  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
+  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
   func.return %0 : tensor<24x25xf32>
 }
 
@@ -57,7 +57,7 @@
   // CHECK: %[[vC:.+]] = vector.transfer_read %[[C]]
   // CHECK: %[[vR:.+]] = vector.contract {{.*}} %[[vA]], %[[vB]], %[[vC]]
   // CHECK: vector.transfer_write %[[vR]], %[[C]]
-  %8 = linalg.matmul ins(%5, %7 : tensor<4x7xf32>, tensor<7x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
+  %8 = linalg.matmul ins(%5, %7 : tensor<4x7xf32>, tensor<7x5xf32>) inits(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
   %9 = tensor.insert_slice %8 into %arg2[%arg3, %arg4] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32>
   return %9 : tensor<24x25xf32>
 }
@@ -105,7 +105,7 @@
   // CHECK: %[[vC:.+]] = vector.transfer_read %[[C]]
   // CHECK: %[[vR:.+]] = vector.contract {{.*}} %[[vA]], %[[vB]], %[[vC]]
   // CHECK: vector.transfer_write %[[vR]], %[[C]]
-  %8 = linalg.matmul ins(%5, %7 : tensor<4x7xf32>, tensor<7x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
+  %8 = linalg.matmul ins(%5, %7 : tensor<4x7xf32>, tensor<7x5xf32>) inits(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
   %9 = tensor.insert_slice %8 into %arg2[%arg3, %arg4] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32>
   return %9 : tensor<24x25xf32>
 }
@@ -123,7 +123,7 @@
                      %arg1: tensor<12x25xf32>,
                      %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> {
   // expected-note @below {{non-isolated target}}
-  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
+  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
   func.return %0 : tensor<24x25xf32>
 }
 
diff --git a/mlir/test/Dialect/Linalg/transform-pack-greedily.mlir b/mlir/test/Dialect/Linalg/transform-pack-greedily.mlir
--- a/mlir/test/Dialect/Linalg/transform-pack-greedily.mlir
+++ b/mlir/test/Dialect/Linalg/transform-pack-greedily.mlir
@@ -15,8 +15,8 @@
   // CHECK-SAME: indexing_maps = [#[[$mk_kkmm]], #[[$kn_kknn]], #[[$mn_mmnn]]]
   // CHECK-SAME:   ["reduction", "parallel", "parallel", "reduction", "parallel", "parallel"]} 
   // CHECK-SAME:   ins(%{{.*}} : tensor<128x8x32x8xf32>, tensor<8x8x32x16xf32>)
-  // CHECK-SAME:  outs(%{{.*}} : tensor<128x8x8x16xf32>)
-  %0 = linalg.matmul ins(%A, %B : !A_mk, !B_kn) outs(%C : !C_mn) -> !C_mn
+  // CHECK-SAME:  inits(%{{.*}} : tensor<128x8x8x16xf32>)
+  %0 = linalg.matmul ins(%A, %B : !A_mk, !B_kn) inits(%C : !C_mn) -> !C_mn
   return %0 : !C_mn
 }
 
@@ -56,8 +56,8 @@
   // CHECK-SAME: indexing_maps = [#[[$mk_kkmm]], #[[$kn_kknn]], #[[$mn_mmnn]]]
   // CHECK-SAME:   ["reduction", "parallel", "parallel", "reduction", "parallel", "parallel"]} 
   // CHECK-SAME:   ins(%{{.*}} : tensor<128x8x32x8xf32>, tensor<8x8x32x16xf32>)
-  // CHECK-SAME:  outs(%{{.*}} : tensor<8x128x8x16xf32>)
-  %0 = linalg.generic #mkn_trait ins(%A, %B : !A_mk, !B_nk) outs(%C : !C_nm) {
+  // CHECK-SAME:  inits(%{{.*}} : tensor<8x128x8x16xf32>)
+  %0 = linalg.generic #mkn_trait ins(%A, %B : !A_mk, !B_nk) inits(%C : !C_nm) {
     ^bb0(%a: f32, %b: f32, %c: f32):
       %d = arith.mulf %a, %b : f32
       %e = arith.addf %c, %d : f32
@@ -101,8 +101,8 @@
   // CHECK-SAME: indexing_maps = [#[[$mk_kkmm]], #[[$kn_kknn]], #[[$mn_mmnn]]]
   // CHECK-SAME:   ["reduction", "parallel", "parallel", "reduction", "parallel", "parallel"]} 
   // CHECK-SAME:   ins(%{{.*}} : tensor<128x8x32x8xf32>, tensor<8x8x32x16xf32>)
-  // CHECK-SAME:  outs(%{{.*}} : tensor<8x128x8x16xf32>)
-  %0 = linalg.generic #mkn_trait ins(%A, %B : !A_mk, !B_nk) outs(%C : !C_nm) {
+  // CHECK-SAME:  inits(%{{.*}} : tensor<8x128x8x16xf32>)
+  %0 = linalg.generic #mkn_trait ins(%A, %B : !A_mk, !B_nk) inits(%C : !C_nm) {
     ^bb0(%a: f32, %b: f32, %c: f32):
       %d = arith.mulf %a, %b : f32
       %e = arith.addf %c, %d : f32
@@ -146,8 +146,8 @@
   // CHECK-SAME: indexing_maps = [#[[$bmkm2_kkmm]], #[[$nkb_kknn]], #[[$nbm_mmnn]]]
   // CHECK-SAME:   ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction", "parallel", "parallel"]} 
   // CHECK-SAME:   ins(%{{.*}} : tensor<42x128x8x33x32x8xf32>, tensor<8x8x42x32x16xf32>)
-  // CHECK-SAME:  outs(%{{.*}} : tensor<8x42x128x8x16xf32>)
-  %0 = linalg.generic #mkn_trait ins(%A, %B : !A_bmkm2, !B_nkb) outs(%C : !C_nbm) {
+  // CHECK-SAME:  inits(%{{.*}} : tensor<8x42x128x8x16xf32>)
+  %0 = linalg.generic #mkn_trait ins(%A, %B : !A_bmkm2, !B_nkb) inits(%C : !C_nbm) {
     ^bb0(%a: f32, %b: f32, %c: f32):
       %d = arith.mulf %a, %b : f32
       %e = arith.addf %c, %d : f32
@@ -182,11 +182,11 @@
   // CHECK-SAME: indexing_maps = [#[[$M1]], #[[$M2]], #[[$M3]]]
   // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "reduction", "reduction", "parallel", "parallel", "reduction", "parallel", "parallel"]
   // CHECK-SAME:  ins(%{{.*}} : tensor<?x2x16x16x32x8xf32>, tensor<1x2x3x3x32x16xf32>)
-  // CHECK-SAME: outs(%{{.*}} : tensor<?x1x14x14x8x16xf32>)
+  // CHECK-SAME: inits(%{{.*}} : tensor<?x1x14x14x8x16xf32>)
   %0 = linalg.conv_2d_nchw_fchw
     {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> }
      ins(%arg0, %c0: tensor<?x47x16x16xf32>, tensor<16x47x3x3xf32>)
-    outs(%arg2: tensor<?x16x14x14xf32>) -> tensor<?x16x14x14xf32>
+    inits(%arg2: tensor<?x16x14x14xf32>) -> tensor<?x16x14x14xf32>
   return %0 : tensor<?x16x14x14xf32>
 }
 
@@ -210,11 +210,11 @@
   // CHECK: linalg.map
   %mapped = linalg.map { arith.addf }
               ins(%arg0, %arg1 : tensor<10x100xf32>, tensor<10x100xf32>)
-              outs(%map_init : tensor<10x100xf32>)
+              inits(%map_init : tensor<10x100xf32>)
   // CHECK: linalg.reduce
   %res = linalg.reduce { arith.addf }
            ins(%mapped: tensor<10x100xf32>)
-           outs(%output: tensor<10xf32>)
+           inits(%output: tensor<10xf32>)
            dimensions = [1]
   return %res : tensor<10xf32>
 }
diff --git a/mlir/test/Dialect/Linalg/transform-patterns.mlir b/mlir/test/Dialect/Linalg/transform-patterns.mlir
--- a/mlir/test/Dialect/Linalg/transform-patterns.mlir
+++ b/mlir/test/Dialect/Linalg/transform-patterns.mlir
@@ -5,7 +5,7 @@
           %v: memref<f32>) {
   linalg.dot ins(%x, %y: memref<?xf32, strided<[1], offset: ?>>,
                          memref<?xf32, strided<[1], offset: ?>>)
-            outs(%v: memref<f32>)
+            inits(%v: memref<f32>)
   return
 }
 
@@ -29,7 +29,7 @@
   linalg.matvec
     ins(%A, %x: memref<?x?xf32, strided<[?, 1], offset: ?>>,
                 memref<?xf32, strided<[1], offset: ?>>)
-    outs(%y: memref<?xf32, strided<[1], offset: ?>>)
+    inits(%y: memref<?xf32, strided<[1], offset: ?>>)
   return
 }
 
@@ -47,7 +47,7 @@
 // CHECK:           scf.for {{.*}} step %[[c6]]
 // CHECK:             linalg.matvec
 // CHECK:               ins({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>, memref<?xf32, strided<[1], offset: ?>>)
-// CHECK:              outs({{.*}}: memref<?xf32, strided<[1], offset: ?>>)
+// CHECK:              inits({{.*}}: memref<?xf32, strided<[1], offset: ?>>)
 
 // -----
 
@@ -56,7 +56,7 @@
              %C: memref<?x?xf32, strided<[?, 1], offset: ?>>) {
   linalg.matmul ins(%A, %B: memref<?x?xf32, strided<[?, 1], offset: ?>>,
                             memref<?x?xf32, strided<[?, 1], offset: ?>>)
-               outs(%C: memref<?x?xf32, strided<[?, 1], offset: ?>>)
+               inits(%C: memref<?x?xf32, strided<[?, 1], offset: ?>>)
   return
 }
 
@@ -97,7 +97,7 @@
 // CHECK:                               scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c4]] {
 // CHECK:                                 linalg.matmul
 // CHECK:                                   ins({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>, memref<?x?xf32, strided<[?, 1], offset: ?>>)
-// CHECK:                                  outs({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>)
+// CHECK:                                  inits({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>)
 
 // -----
 
@@ -124,7 +124,7 @@
   linalg.generic #generic_matmul_trait
     ins(%A, %B : memref<?x?xf32, strided<[?, 1], offset: ?>>,
                  memref<?x?xf32, strided<[?, 1], offset: ?>>)
-   outs(%C : memref<?x?xf32, strided<[?, 1], offset: ?>>) {
+   inits(%C : memref<?x?xf32, strided<[?, 1], offset: ?>>) {
     ^bb(%a: f32, %b: f32, %c: f32):
       %d = arith.mulf %a, %b: f32
       %e = arith.addf %c, %d: f32
@@ -155,7 +155,7 @@
              %y: memref<?xf32, strided<[1], offset: ?>>) {
   linalg.matvec ins(%A, %x: memref<?x?xf32, strided<[?, 1], offset: ?>>,
                             memref<?xf32, strided<[1], offset: ?>>)
-               outs(%y: memref<?xf32, strided<[1], offset: ?>>)
+               inits(%y: memref<?xf32, strided<[1], offset: ?>>)
   return
 }
 
@@ -173,7 +173,7 @@
 // CHECK:           scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c5]]
 // CHECK:             linalg.matvec
 // CHECK:               ins({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>, memref<?xf32, strided<[1], offset: ?>>)
-// CHECK:              outs({{.*}}: memref<?xf32, strided<[1], offset: ?>>)
+// CHECK:              inits({{.*}}: memref<?xf32, strided<[1], offset: ?>>)
 
 // -----
 
@@ -182,7 +182,7 @@
              %C: memref<?x?xf32, strided<[?, 1], offset: ?>>) {
   linalg.matmul ins(%A, %B: memref<?x?xf32, strided<[?, 1], offset: ?>>,
                             memref<?x?xf32, strided<[?, 1], offset: ?>>)
-               outs(%C : memref<?x?xf32, strided<[?, 1], offset: ?>>)
+               inits(%C : memref<?x?xf32, strided<[?, 1], offset: ?>>)
   return
 }
 
@@ -216,4 +216,4 @@
 // CHECK:                         scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c40]] {
 // CHECK:                                 linalg.matmul
 // CHECK:                                  ins({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>, memref<?x?xf32, strided<[?, 1], offset: ?>>)
-// CHECK:                                   outs({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>)
+// CHECK:                                   inits({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>)
diff --git a/mlir/test/Dialect/Linalg/transform-promotion.mlir b/mlir/test/Dialect/Linalg/transform-promotion.mlir
--- a/mlir/test/Dialect/Linalg/transform-promotion.mlir
+++ b/mlir/test/Dialect/Linalg/transform-promotion.mlir
@@ -22,7 +22,7 @@
              memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, ?], offset: ?>>
         linalg.matmul ins(%3, %4: memref<?x?xf32, strided<[?, ?], offset: ?>>,
                                   memref<?x?xf32, strided<[?, ?], offset: ?>>)
-                     outs(%5: memref<?x?xf32, strided<[?, ?], offset: ?>>)
+                     inits(%5: memref<?x?xf32, strided<[?, ?], offset: ?>>)
       }
     }
   }
@@ -56,7 +56,7 @@
 // CHECK:               memref.copy %[[s2]], %[[l2]] : memref<?x?xf32, strided{{.*}}> to memref<?x?xf32, strided{{.*}}>
 // CHECK:               linalg.matmul
 // CHECK-SAME:                 ins(%[[v0]], %[[v1]] : memref<?x?xf32>, memref<?x?xf32>)
-// CHECK-SAME:                outs(%[[v2]] : memref<?x?xf32>)
+// CHECK-SAME:                inits(%[[v2]] : memref<?x?xf32>)
 
 transform.sequence failures(propagate) {
 ^bb0(%arg1: !pdl.operation):
@@ -89,7 +89,7 @@
         linalg.matmul {__internal_linalg_transform__ = "_promote_first_view_"}
           ins(%3, %4: memref<?x?xf32, strided<[?, ?], offset: ?>>,
                       memref<?x?xf32, strided<[?, ?], offset: ?>>)
-         outs(%5: memref<?x?xf32, strided<[?, ?], offset: ?>>)
+         inits(%5: memref<?x?xf32, strided<[?, ?], offset: ?>>)
       }
     }
   }
@@ -116,7 +116,7 @@
 // CHECK-NOT:     memref.copy
 // CHECK:         linalg.matmul
 // CHECK-SAME:           ins(%[[v0]], %[[s1]] : memref<?x?xf32>, memref<?x?xf32, strided<[?, ?], offset: ?>>)
-// CHECK-SAME:          outs(%[[s2]] : memref<?x?xf32, strided<[?, ?], offset: ?>>)
+// CHECK-SAME:          inits(%[[s2]] : memref<?x?xf32, strided<[?, ?], offset: ?>>)
 
 transform.with_pdl_patterns {
 ^bb0(%arg0: !pdl.operation):
@@ -138,7 +138,7 @@
   %3 = memref.subview %arg0[%c0, %c0][%c2000, %c4000][%c1, %c1] :
  	 memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, ?], offset: ?>>
   linalg.fill
-   ins(%cf : f32) outs(%3 : memref<?x?xf32, strided<[?, ?], offset: ?>>)
+   ins(%cf : f32) inits(%3 : memref<?x?xf32, strided<[?, ?], offset: ?>>)
   return
 }
 // CHECK-LABEL: func @aligned_promote_fill
@@ -147,9 +147,9 @@
 // CHECK:         %[[a0:.*]] = memref.alloc() {alignment = 32 : i64} : memref<32000000xi8>
 // CHECK:         %[[v0:.*]] = memref.view %[[a0]]{{.*}} : memref<32000000xi8> to memref<?x?xf32>
 // CHECK:         %[[l0:.*]] = memref.subview %[[v0]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref<?x?xf32> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-// CHECK:         linalg.fill ins({{.*}} : f32) outs(%[[v0]] : memref<?x?xf32>)
+// CHECK:         linalg.fill ins({{.*}} : f32) inits(%[[v0]] : memref<?x?xf32>)
 // CHECK:         memref.copy %[[s0]], %[[l0]] : memref<?x?xf32, strided{{.*}}> to memref<?x?xf32, strided{{.*}}>
-// CHECK:         linalg.fill ins(%[[cf]] : f32) outs(%[[v0]] : memref<?x?xf32>)
+// CHECK:         linalg.fill ins(%[[cf]] : f32) inits(%[[v0]] : memref<?x?xf32>)
 
 transform.with_pdl_patterns {
 ^bb0(%arg0: !pdl.operation):
@@ -172,7 +172,7 @@
   %3 = memref.subview %arg0[%c0, %c0][%c2000, %c4000][%c1, %c1] :
  	 memref<?x?xcomplex<f32>, strided<[?, 1], offset: ?>> to memref<?x?xcomplex<f32>, strided<[?, ?], offset: ?>>
   linalg.fill ins(%cc : complex<f32>)
-             outs(%3 : memref<?x?xcomplex<f32>, strided<[?, ?], offset: ?>>)
+             inits(%3 : memref<?x?xcomplex<f32>, strided<[?, ?], offset: ?>>)
   return
 }
 // CHECK-LABEL: func @aligned_promote_fill_complex
@@ -181,9 +181,9 @@
 // CHECK:         %[[a0:.*]] = memref.alloc() {alignment = 32 : i64} : memref<64000000xi8>
 // CHECK:         %[[v0:.*]] = memref.view %[[a0]]{{.*}} : memref<64000000xi8> to memref<?x?xcomplex<f32>>
 // CHECK:         %[[l0:.*]] = memref.subview %[[v0]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref<?x?xcomplex<f32>> to memref<?x?xcomplex<f32>, strided<[?, 1], offset: ?>>
-// CHECK:         linalg.fill ins({{.*}} : complex<f32>) outs(%[[v0]] : memref<?x?xcomplex<f32>>)
+// CHECK:         linalg.fill ins({{.*}} : complex<f32>) inits(%[[v0]] : memref<?x?xcomplex<f32>>)
 // CHECK:         memref.copy %[[s0]], %[[l0]] : memref<?x?xcomplex<f32>, strided{{.*}}> to memref<?x?xcomplex<f32>, strided{{.*}}>
-// CHECK:         linalg.fill ins(%[[cc]] : complex<f32>) outs(%[[v0]] : memref<?x?xcomplex<f32>>)
+// CHECK:         linalg.fill ins(%[[cc]] : complex<f32>) inits(%[[v0]] : memref<?x?xcomplex<f32>>)
 
 transform.with_pdl_patterns {
 ^bb0(%arg0: !pdl.operation):
diff --git a/mlir/test/Dialect/Linalg/transform-tile-and-fuse.mlir b/mlir/test/Dialect/Linalg/transform-tile-and-fuse.mlir
--- a/mlir/test/Dialect/Linalg/transform-tile-and-fuse.mlir
+++ b/mlir/test/Dialect/Linalg/transform-tile-and-fuse.mlir
@@ -17,11 +17,11 @@
     %5 = linalg.fill
         {__producer__}
         ins(%cst : f32)
-        outs(%D : tensor<?x?xf32>) -> tensor<?x?xf32>
+        inits(%D : tensor<?x?xf32>) -> tensor<?x?xf32>
     %6 = linalg.matmul
         {__producer__}
         ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>)
-        outs(%5 : tensor<?x?xf32>) -> tensor<?x?xf32>
+        inits(%5 : tensor<?x?xf32>) -> tensor<?x?xf32>
     %7 = linalg.generic
         {__root__,
          indexing_maps = [affine_map<(d0, d1) -> (d0)>,
@@ -30,7 +30,7 @@
          iterator_types = ["parallel", "parallel"]
         }
         ins(%C, %6 : tensor<?xf32>, tensor<?x?xf32>)
-        outs(%D : tensor<?x?xf32>) {
+        inits(%D : tensor<?x?xf32>) {
     ^bb0(%arg2: f32, %arg3: f32, %arg4: f32):
       %16 = arith.maxf %arg3, %cst : f32
       %17 = arith.cmpf ogt, %arg2, %cst : f32
@@ -74,11 +74,11 @@
     %5 = linalg.fill
         {__producer__}
         ins(%cst : f32)
-        outs(%D : tensor<?x?xf32>) -> tensor<?x?xf32>
+        inits(%D : tensor<?x?xf32>) -> tensor<?x?xf32>
     %6 = linalg.matmul
         {__producer__}
         ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>)
-        outs(%5 : tensor<?x?xf32>) -> tensor<?x?xf32>
+        inits(%5 : tensor<?x?xf32>) -> tensor<?x?xf32>
     %7 = linalg.generic
         {__root__,
          indexing_maps = [affine_map<(d0, d1) -> (d0)>,
@@ -87,7 +87,7 @@
          iterator_types = ["parallel", "parallel"]
         }
         ins(%C, %6 : tensor<?xf32>, tensor<?x?xf32>)
-        outs(%D : tensor<?x?xf32>) {
+        inits(%D : tensor<?x?xf32>) {
     ^bb0(%arg2: f32, %arg3: f32, %arg4: f32):
       %16 = arith.maxf %arg3, %cst : f32
       %17 = arith.cmpf ogt, %arg2, %cst : f32
diff --git a/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir b/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir
--- a/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir
+++ b/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir
@@ -5,7 +5,7 @@
                                           affine_map<(d0, d1) -> (d0)>],
    iterator_types = ["parallel", "reduction"]}
    ins(%arg0 : tensor<?x?xf32>)
-   outs(%out : tensor<?xf32>) {
+   inits(%out : tensor<?xf32>) {
     ^bb0(%arg7: f32, %arg9: f32):
       %1 = arith.mulf %arg7, %arg7 : f32
       %2 = arith.addf %1, %arg9 : f32
@@ -33,12 +33,12 @@
 // CHECK-DAG:   %[[D1:.*]] = tensor.dim %[[ARG0]], %[[C1]] : tensor<?x?xf32>
 // CHECK-DAG:   %[[D2:.*]] = tensor.dim %[[ARG1]], %[[C0]] : tensor<?xf32>
 //     CHECK:   %[[E:.*]] = tensor.empty(%[[D2]]) : tensor<?x5xf32>
-//     CHECK:   %[[F:.*]] = linalg.fill ins(%[[I]] : f32) outs(%[[E]] : tensor<?x5xf32>) -> tensor<?x5xf32>
+//     CHECK:   %[[F:.*]] = linalg.fill ins(%[[I]] : f32) inits(%[[E]] : tensor<?x5xf32>) -> tensor<?x5xf32>
 //     CHECK:   %[[L:.*]] = scf.for %[[K:.*]] = %[[C0]] to %[[D1]] step %[[C5]] iter_args(%[[ARG3:.*]] = %[[F]]) -> (tensor<?x5xf32>) {
 //     CHECK:     %[[PS:.*]] = affine.min #[[MAP2]](%[[K]])[%[[D1]]]
 //     CHECK:     %[[EXT2:.*]] = tensor.extract_slice %[[ARG0]][0, %[[K:.*]]] [%[[D0]], %[[PS]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
 //     CHECK:     %[[EXT:.*]] = tensor.extract_slice %[[ARG3]][0, 0] [%[[D0]], %[[PS]]] [1, 1] : tensor<?x5xf32> to tensor<?x?xf32>
-//     CHECK:     %[[PR:.*]] = linalg.generic {indexing_maps = [#[[MAP0]], #[[MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[EXT2]] : tensor<?x?xf32>) outs(%[[EXT]] : tensor<?x?xf32>) {
+//     CHECK:     %[[PR:.*]] = linalg.generic {indexing_maps = [#[[MAP0]], #[[MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[EXT2]] : tensor<?x?xf32>) inits(%[[EXT]] : tensor<?x?xf32>) {
 //     CHECK:       arith.mulf
 //     CHECK:       arith.addf
 //     CHECK:       linalg.yield
@@ -48,7 +48,7 @@
 //     CHECK:     %[[INS:.*]] = tensor.insert_slice %[[PR]] into %[[ARG3]][0, 0] [%[[D3]], %[[D4]]] [1, 1] : tensor<?x?xf32> into tensor<?x5xf32>
 //     CHECK:     scf.yield %[[INS]] : tensor<?x5xf32>
 //     CHECK:   }
-//     CHECK:   %[[R:.*]] = linalg.generic {indexing_maps = [#[[MAP0]], #[[MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[L]] : tensor<?x5xf32>) outs(%[[ARG1]] : tensor<?xf32>) {
+//     CHECK:   %[[R:.*]] = linalg.generic {indexing_maps = [#[[MAP0]], #[[MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[L]] : tensor<?x5xf32>) inits(%[[ARG1]] : tensor<?xf32>) {
 //     CHECK:     arith.addf
 //     CHECK:     linalg.yield
 //     CHECK:   } -> tensor<?xf32>
@@ -61,7 +61,7 @@
                                           affine_map<(d0, d1) -> (d1)>],
    iterator_types = ["reduction", "parallel"]}
    ins(%arg0 : tensor<?x?xf32>)
-   outs(%out : tensor<?xf32>) {
+   inits(%out : tensor<?xf32>) {
     ^bb0(%arg7: f32, %arg9: f32):
       %42 = arith.addf %arg7, %arg9 : f32
       linalg.yield %42 : f32
@@ -97,7 +97,7 @@
                                           affine_map<(d0, d1) -> (d0)>],
    iterator_types = ["parallel", "reduction"]}
    ins(%arg0 : tensor<?x?xf32>)
-   outs(%out : tensor<?xf32>) {
+   inits(%out : tensor<?xf32>) {
     ^bb0(%arg7: f32, %arg9: f32):
       %1 = arith.mulf %arg7, %arg7 : f32
       %2 = arith.addf %1, %arg9 : f32
@@ -126,7 +126,7 @@
 // CHECK-DAG:   %[[D1:.*]] = tensor.dim %[[ARG0]], %[[C1]] : tensor<?x?xf32>
 // CHECK-DAG:   %[[D2:.*]] = tensor.dim %[[ARG1]], %[[C0]] : tensor<?xf32>
 //     CHECK:   %[[E:.*]] = tensor.empty(%[[D2]]) : tensor<?x5xf32>
-//     CHECK:   %[[F:.*]] = linalg.fill ins(%[[I]] : f32) outs(%[[E]] : tensor<?x5xf32>) -> tensor<?x5xf32>
+//     CHECK:   %[[F:.*]] = linalg.fill ins(%[[I]] : f32) inits(%[[E]] : tensor<?x5xf32>) -> tensor<?x5xf32>
 //     CHECK:   %[[L:.*]] = scf.forall (%[[IV:.+]]) in (5) shared_outs(%[[ARG3:.+]] = %[[F]]) -> (tensor<?x5xf32>) {
 // CHECK-DAG:     %[[TS0:.+]] = affine.min #[[MAP0]](%[[IV]])[%[[D1]]]
 // CHECK-DAG:     %[[TS1:.+]] = affine.max #[[MAP1]](%[[TS0]])
@@ -134,7 +134,7 @@
 //     CHECK:     %[[TINDEX:.+]] = affine.apply #[[MAP2]](%[[IV]])[%[[D1]]]
 //     CHECK:     %[[INCHUNK:.+]] = tensor.extract_slice %[[ARG0]][0, %[[TINDEX]]] [%[[D0]], %[[TS1]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
 //     CHECK:     %[[TEMPEXT:.+]] = tensor.extract_slice %[[ET]][0] [%[[D0]]] [1] : tensor<?xf32> to tensor<?xf32>
-//     CHECK:     %[[PARTIAL:.+]] = linalg.generic {indexing_maps = [#[[MAP3]], #[[MAP4]]], iterator_types = ["parallel", "reduction"]} ins(%[[INCHUNK]] : tensor<?x?xf32>) outs(%[[TEMPEXT]] : tensor<?xf32>) {
+//     CHECK:     %[[PARTIAL:.+]] = linalg.generic {indexing_maps = [#[[MAP3]], #[[MAP4]]], iterator_types = ["parallel", "reduction"]} ins(%[[INCHUNK]] : tensor<?x?xf32>) inits(%[[TEMPEXT]] : tensor<?xf32>) {
 //     CHECK:       arith.mulf
 //     CHECK:       arith.addf
 //     CHECK:       linalg.yield
@@ -143,7 +143,7 @@
 //     CHECK:       tensor.parallel_insert_slice %[[PARTIAL]] into %[[ARG3]][0, %[[IV]]] [%[[D0]], 1] [1, 1] : tensor<?xf32> into tensor<?x5xf32>
 //     CHECK:     }
 //     CHECK:   }
-//     CHECK:   %[[R:.*]] = linalg.generic {indexing_maps = [#[[MAP3]], #[[MAP4]]], iterator_types = ["parallel", "reduction"]} ins(%[[L]] : tensor<?x5xf32>) outs(%[[ARG1]] : tensor<?xf32>) {
+//     CHECK:   %[[R:.*]] = linalg.generic {indexing_maps = [#[[MAP3]], #[[MAP4]]], iterator_types = ["parallel", "reduction"]} ins(%[[L]] : tensor<?x5xf32>) inits(%[[ARG1]] : tensor<?xf32>) {
 //     CHECK:     arith.addf
 //     CHECK:     linalg.yield
 //     CHECK:   } -> tensor<?xf32>
@@ -154,7 +154,7 @@
 func.func @matmul_tile_parallel(
   %A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %out: tensor<?x?xf32>) -> tensor<?x?xf32> {
   %matmul = linalg.matmul ins(%A, %B: tensor<?x?xf32>, tensor<?x?xf32>)
-                     outs(%out: tensor<?x?xf32>) -> tensor<?x?xf32>
+                     inits(%out: tensor<?x?xf32>) -> tensor<?x?xf32>
   return %matmul : tensor<?x?xf32>
 }
 
@@ -180,7 +180,7 @@
 // CHECK-DAG:   %[[D3:.*]] = tensor.dim %[[ARG2]], %[[C0]] : tensor<?x?xf32>
 // CHECK-DAG:   %[[D4:.*]] = tensor.dim %[[ARG2]], %[[C1]] : tensor<?x?xf32>
 //     CHECK:   %[[E:.*]] = tensor.empty(%[[D3]], %[[D4]]) : tensor<?x?x5xf32>
-//     CHECK:   %[[F:.*]] = linalg.fill ins(%[[I]] : f32) outs(%[[E]] : tensor<?x?x5xf32>) -> tensor<?x?x5xf32>
+//     CHECK:   %[[F:.*]] = linalg.fill ins(%[[I]] : f32) inits(%[[E]] : tensor<?x?x5xf32>) -> tensor<?x?x5xf32>
 //     CHECK:   %[[L:.*]] = scf.forall (%[[IV:.+]]) in (5) shared_outs(%[[ARG3:.+]] = %[[F]]) -> (tensor<?x?x5xf32>) {
 // CHECK-DAG:     %[[TS0:.+]] = affine.min #[[MAP0]](%[[IV]])[%[[D1]]]
 // CHECK-DAG:     %[[TS1:.+]] = affine.max #[[MAP1]](%[[TS0]])
@@ -189,12 +189,12 @@
 //     CHECK:     %[[INCHUNKA:.+]] = tensor.extract_slice %[[ARG0]][0, %[[TINDEX]]] [%[[D0]], %[[TS1]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
 //     CHECK:     %[[INCHUNKB:.+]] = tensor.extract_slice %[[ARG1]][%[[TINDEX]], 0] [%[[TS1]], %[[D2]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
 //     CHECK:     %[[TEMPEXT:.+]] = tensor.extract_slice %[[ET]][0, 0] [%[[D0]], %[[D2]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-//     CHECK:     %[[PARTIAL:.+]] = linalg.matmul ins(%[[INCHUNKA]], %[[INCHUNKB]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[TEMPEXT]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+//     CHECK:     %[[PARTIAL:.+]] = linalg.matmul ins(%[[INCHUNKA]], %[[INCHUNKB]] : tensor<?x?xf32>, tensor<?x?xf32>) inits(%[[TEMPEXT]] : tensor<?x?xf32>) -> tensor<?x?xf32>
 //     CHECK:     scf.forall.in_parallel {
 //     CHECK:       tensor.parallel_insert_slice %[[PARTIAL]] into %[[ARG3]][0, 0, %[[IV]]] [%[[D0]], %[[D2]], 1] [1, 1, 1] : tensor<?x?xf32> into tensor<?x?x5xf32>
 //     CHECK:     }
 //     CHECK:   }
-//     CHECK:   %[[R:.*]] = linalg.generic {indexing_maps = [#[[MAP3]], #[[MAP4]]], iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[L]] : tensor<?x?x5xf32>) outs(%[[ARG2]] : tensor<?x?xf32>) {
+//     CHECK:   %[[R:.*]] = linalg.generic {indexing_maps = [#[[MAP3]], #[[MAP4]]], iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[L]] : tensor<?x?x5xf32>) inits(%[[ARG2]] : tensor<?x?xf32>) {
 //     CHECK:     arith.addf
 //     CHECK:     linalg.yield
 //     CHECK:   } -> tensor<?x?xf32>
@@ -208,7 +208,7 @@
                                           affine_map<(d0, d1) -> (d0)>],
    iterator_types = ["parallel", "reduction"]}
    ins(%arg0 : tensor<?x?xf32>)
-   outs(%out : tensor<?xf32>) {
+   inits(%out : tensor<?xf32>) {
     ^bb0(%arg7: f32, %arg9: f32):
       %1 = arith.mulf %arg7, %arg7 : f32
       %2 = arith.addf %1, %arg9 : f32
@@ -237,7 +237,7 @@
 // CHECK-DAG:   %[[D0:.*]] = tensor.dim %[[ARG0]], %[[C0]] : tensor<?x?xf32>
 // CHECK-DAG:   %[[D2:.*]] = tensor.dim %[[ARG1]], %[[C0]] : tensor<?xf32>
 //     CHECK:   %[[E:.*]] = tensor.empty(%[[D2]]) : tensor<?x5xf32>
-//     CHECK:   %[[F:.*]] = linalg.fill ins(%[[I]] : f32) outs(%[[E]] : tensor<?x5xf32>) -> tensor<?x5xf32>
+//     CHECK:   %[[F:.*]] = linalg.fill ins(%[[I]] : f32) inits(%[[E]] : tensor<?x5xf32>) -> tensor<?x5xf32>
 //     CHECK:   %[[L:.*]] = scf.forall (%[[IV:.+]]) in (5) shared_outs(%[[ARG3:.+]] = %[[F]]) -> (tensor<?x5xf32>) {
 //     CHECK:     %[[ET:.+]] = tensor.extract_slice %[[ARG3:.+]][0, %[[IV]]] [%[[D0]], 1] [1, 1] : tensor<?x5xf32> to tensor<?xf32>
 //     CHECK:     %[[D1:.*]] = tensor.dim %[[ARG0]], %[[C1]] : tensor<?x?xf32>
@@ -247,7 +247,7 @@
 //     CHECK:       %[[D3:.+]] = tensor.dim %[[ACC]], %[[C0]] : tensor<?xf32>
 //     CHECK:       %[[INCHUNK:.+]] = tensor.extract_slice %[[ARG0]][0, %[[IV1]]] [%[[D0]], %[[TS0]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
 //     CHECK:       %[[TEMPEXT:.+]] = tensor.extract_slice %[[ACC]][0] [%[[D3]]] [1] : tensor<?xf32> to tensor<?xf32>
-//     CHECK:       %[[PARTIAL:.+]] = linalg.generic {indexing_maps = [#[[MAP2]], #[[MAP3]]], iterator_types = ["parallel", "reduction"]} ins(%[[INCHUNK]] : tensor<?x?xf32>) outs(%[[TEMPEXT]] : tensor<?xf32>) {
+//     CHECK:       %[[PARTIAL:.+]] = linalg.generic {indexing_maps = [#[[MAP2]], #[[MAP3]]], iterator_types = ["parallel", "reduction"]} ins(%[[INCHUNK]] : tensor<?x?xf32>) inits(%[[TEMPEXT]] : tensor<?xf32>) {
 //     CHECK:         arith.mulf
 //     CHECK:         arith.addf
 //     CHECK:         linalg.yield
@@ -259,7 +259,7 @@
 //     CHECK:       tensor.parallel_insert_slice %[[CARRY]] into %[[ARG3]][0, %[[IV]]] [%[[D0]], 1] [1, 1] : tensor<?xf32> into tensor<?x5xf32>
 //     CHECK:     }
 //     CHECK:   }
-//     CHECK:   %[[R:.*]] = linalg.generic {indexing_maps = [#[[MAP2]], #[[MAP3]]], iterator_types = ["parallel", "reduction"]} ins(%[[L]] : tensor<?x5xf32>) outs(%[[ARG1]] : tensor<?xf32>) {
+//     CHECK:   %[[R:.*]] = linalg.generic {indexing_maps = [#[[MAP2]], #[[MAP3]]], iterator_types = ["parallel", "reduction"]} ins(%[[L]] : tensor<?x5xf32>) inits(%[[ARG1]] : tensor<?xf32>) {
 //     CHECK:     arith.addf
 //     CHECK:     linalg.yield
 //     CHECK:   } -> tensor<?xf32>
@@ -273,7 +273,7 @@
                                           affine_map<(d0, d1) -> (d0)>],
    iterator_types = ["parallel", "reduction"]}
    ins(%arg0 : tensor<?x?xf32>)
-   outs(%out : tensor<?xf32>) {
+   inits(%out : tensor<?xf32>) {
     ^bb0(%arg7: f32, %arg9: f32):
       %1 = arith.mulf %arg7, %arg7 : f32
       %2 = arith.addf %1, %arg9 : f32
@@ -310,7 +310,7 @@
                                           affine_map<(d0, d1) -> (d0)>],
    iterator_types = ["parallel", "reduction"]}
    ins(%arg0 : tensor<?x?xf32>)
-   outs(%out : tensor<?xf32>) {
+   inits(%out : tensor<?xf32>) {
     ^bb0(%arg7: f32, %arg9: f32):
       %1 = arith.mulf %arg7, %arg7 : f32
       %2 = arith.addf %1, %arg9 : f32
@@ -337,7 +337,7 @@
   func.func @fail_for_float_neutral(%arg0: tensor<?x?xf32>, %arg1: tensor<?xf32>) -> tensor<?xf32> {
     // expected-error @below {{'linalg.generic' op Failed to get an identity value for the reduction operation.}}
     // expected-note @below {{when applied to this op}}
-    %0 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor<?x?xf32>) outs(%arg1 : tensor<?xf32>) {
+    %0 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor<?x?xf32>) inits(%arg1 : tensor<?xf32>) {
     ^bb0(%in: f32, %out: f32):
       %1 = llvm.fmul %in, %in  : f32
       %2 = llvm.fadd %1, %out  : f32
diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir
--- a/mlir/test/Dialect/Linalg/vectorization.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization.mlir
@@ -6,7 +6,7 @@
 // CHECK: arith.mulf %{{.*}}, %{{.*}} : vector<1584xf32>
 // CHECK: vector.multi_reduction <add>, %{{.*}}, {{.*}} [0] : vector<1584xf32> to f32
   linalg.dot ins(%A, %B: memref<1584xf32>, memref<1584xf32>)
-            outs(%C: memref<f32>)
+            inits(%C: memref<f32>)
   return
 }
 
@@ -25,7 +25,7 @@
 // CHECK: arith.mulf %{{.*}}, %{{.*}} : vector<1584x1584xf32>
 // CHECK: vector.multi_reduction <add>, %{{.*}}, {{.*}} [1] : vector<1584x1584xf32> to vector<1584xf32>
   linalg.matvec ins(%A, %B: memref<1584x1584xf32>, memref<1584xf32>)
-            outs(%C: memref<1584xf32>)
+            inits(%C: memref<1584xf32>)
   return
 }
 
@@ -43,7 +43,7 @@
 // CHECK: arith.mulf %{{.*}}, %{{.*}} : vector<1584x1584x1584xf32>
 // CHECK: vector.multi_reduction <add>, %{{.*}}, {{.*}} [2] : vector<1584x1584x1584xf32> to vector<1584x1584xf32>
   linalg.matmul ins(%A, %B: memref<1584x1584xf32>, memref<1584x1584xf32>)
-            outs(%C: memref<1584x1584xf32>)
+            inits(%C: memref<1584x1584xf32>)
   return
 }
 
@@ -62,7 +62,7 @@
 // CHECK: vector.multi_reduction <add>, %{{.*}}, {{.*}} [3] : vector<1584x1584x1584x1584xf32> to vector<1584x1584x1584xf32>
   linalg.batch_matmul
     ins(%A, %B: memref<1584x1584x1584xf32>, memref<1584x1584x1584xf32>)
-   outs(%C: memref<1584x1584x1584xf32>)
+   inits(%C: memref<1584x1584x1584xf32>)
   return
 }
 
@@ -97,7 +97,7 @@
   //       CHECK: vector.transfer_write %{{.*}}, %{{.*}} : vector<8x32xf32>, memref<8x32xf32>
   linalg.generic #matmul_trait
     ins(%A, %B : memref<8x16xf32>, memref<16x32xf32>)
-   outs(%C : memref<8x32xf32>) {
+   inits(%C : memref<8x32xf32>) {
     ^bb(%a: f32, %b: f32, %c: f32) :
       %d = arith.mulf %a, %b: f32
       %e = arith.addf %c, %d: f32
@@ -137,7 +137,7 @@
   //       CHECK: vector.transfer_write %{{.*}}, %{{.*}} : vector<8x32xf32>, memref<32x8xf32>
   linalg.generic #matmul_transpose_out_trait
     ins(%A, %B : memref<8x16xf32>, memref<16x32xf32>)
-   outs(%C : memref<32x8xf32>) {
+   inits(%C : memref<32x8xf32>) {
     ^bb(%a: f32, %b: f32, %c: f32) :
       %d = arith.mulf %a, %b: f32
       %e = arith.addf %c, %d: f32
@@ -166,7 +166,7 @@
   %1 = linalg.generic {indexing_maps = [#map0, #map1],
                        iterator_types = ["parallel", "parallel", "parallel"]}
     ins(%arg0 : tensor<12x128x32xf32>)
-    outs(%0 : tensor<128x12x32xf32>) {
+    inits(%0 : tensor<128x12x32xf32>) {
   ^bb0(%arg1: f32, %arg2: f32):
     linalg.yield %arg1 : f32
   } -> tensor<128x12x32xf32>
@@ -204,7 +204,7 @@
   //       CHECK: vector.transfer_write %{{.*}}, %{{.*}} : vector<8x32xi32>, memref<8x32xi32>
   linalg.generic #matmul_trait
     ins(%A, %B : memref<8x16xi32>, memref<16x32xi32>)
-   outs(%C : memref<8x32xi32>) {
+   inits(%C : memref<8x32xi32>) {
     ^bb(%a: i32, %b: i32, %c: i32) :
       %d = arith.muli %a, %b: i32
       %e = arith.addi %c, %d: i32
@@ -229,7 +229,7 @@
   //       CHECK: vector.multi_reduction <add>, %{{.*}}, {{.*}} [2] : vector<8x32x16xf32> to vector<8x32xf32>
   linalg.matmul
     ins(%A, %B: memref<8x16xf32>, memref<16x32xf32>)
-   outs(%C: memref<8x32xf32>)
+   inits(%C: memref<8x32xf32>)
   return
 }
 
@@ -250,7 +250,7 @@
     indexing_maps = [affine_map<(m, n) -> ()>, affine_map<(m, n) -> (m, n)>],
     iterator_types = ["parallel", "parallel"]}
    ins(%arg0 : f32)
-  outs(%A: memref<8x16xf32>) {
+  inits(%A: memref<8x16xf32>) {
     ^bb(%0: f32, %1: f32) :
       linalg.yield %0 : f32
   }
@@ -274,7 +274,7 @@
     indexing_maps = [affine_map<(m, n) -> ()>, affine_map<(m, n) -> (m, n)>],
     iterator_types = ["parallel", "parallel"]}
    ins(%arg0 : complex<f32>)
-  outs(%A: memref<8x16xcomplex<f32>>) {
+  inits(%A: memref<8x16xcomplex<f32>>) {
     ^bb(%0: complex<f32>, %1: complex<f32>) :
       linalg.yield %0 : complex<f32>
   }
@@ -297,7 +297,7 @@
   %1 = linalg.generic {indexing_maps = [#map0, #map0],
                        iterator_types = ["parallel"]}
     ins(%arg0 : tensor<5xf32>)
-    outs(%0 : tensor<5xi32>) {
+    inits(%0 : tensor<5xi32>) {
   ^bb0(%arg1: f32, %arg2: i32):
     %2 = linalg.index 0 : index
     %11 = affine.apply affine_map<() -> (123)>()
@@ -339,7 +339,7 @@
 func.func @test_vectorize_fill(%A : memref<8x16xf32>, %arg0 : f32) {
   //       CHECK: %[[V:.*]] = vector.broadcast {{.*}} : f32 to vector<8x16xf32>
   //       CHECK: vector.transfer_write %[[V]], {{.*}} : vector<8x16xf32>, memref<8x16xf32>
-  linalg.fill ins(%arg0 : f32) outs(%A : memref<8x16xf32>)
+  linalg.fill ins(%arg0 : f32) inits(%A : memref<8x16xf32>)
   return
 }
 
@@ -357,7 +357,7 @@
   // CHECK-SAME: (%[[M:.*]]: memref<f32>, %[[val:.*]]: f32)
   //      CHECK:   %[[VEC:.*]] = vector.broadcast %[[val]] : f32 to vector<f32>
   //      CHECK:   vector.transfer_write %[[VEC]], %[[M]][] : vector<f32>, memref<f32>
-  linalg.fill ins(%arg0 : f32) outs(%A : memref<f32>)
+  linalg.fill ins(%arg0 : f32) inits(%A : memref<f32>)
   return
 }
 
@@ -432,7 +432,7 @@
     indexing_maps = [
       affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>],
     iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
-  outs(%arg0: memref<1x2x4x8xindex>) {
+  inits(%arg0: memref<1x2x4x8xindex>) {
   ^bb0(%arg1: index):
   //       CHECK:   %[[BCST:.*]] = vector.broadcast %[[CST0]] : vector<8xindex> to vector<1x2x4x8xindex>
   //       CHECK:   vector.transfer_write %[[BCST]], %[[ARG0]][%[[C0]], %[[C0]], %[[C0]], %[[C0]]] {{.*}} : vector<1x2x4x8xindex>, memref<1x2x4x8xindex>
@@ -460,7 +460,7 @@
     indexing_maps = [
       affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>],
     iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
-  outs(%arg0: memref<1x2x4x8xindex>) {
+  inits(%arg0: memref<1x2x4x8xindex>) {
   ^bb0(%arg1: index):
   //       CHECK:   %[[BCST:.*]] = vector.broadcast %[[CST0]] : vector<2xindex> to vector<1x8x4x2xindex>
   //       CHECK:   %[[TRAN:.*]] = vector.transpose %[[BCST]], [0, 3, 2, 1] : vector<1x8x4x2xindex> to vector<1x2x4x8xindex>
@@ -508,7 +508,7 @@
       affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]}
   ins(%arg1, %arg2: memref<4x256xf32>, memref<256xf32>)
-  outs(
+  inits(
     %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0 :
     memref<4x256xf32>, memref<4x256xf32>, memref<4x256xf32>, memref<4x256xf32>,
     memref<4x256xf32>, memref<4x256xf32>, memref<4x256xf32>, memref<4x256xf32>,
@@ -590,7 +590,7 @@
       affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]}
   ins(%arg1, %arg2: tensor<4x256xf32>, tensor<256xf32>)
-  outs(
+  inits(
     %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0 :
     tensor<4x256xf32>, tensor<4x256xf32>, tensor<4x256xf32>, tensor<4x256xf32>,
     tensor<4x256xf32>, tensor<4x256xf32>, tensor<4x256xf32>, tensor<4x256xf32>,
@@ -681,7 +681,7 @@
                    affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>],
   iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
   ins(%B, %A, %A, %B: memref<4x4xf32>, memref<4xf32>, memref<4xf32>, memref<4x4xf32>)
-  outs(%C : memref<4x4x4x4xf32>) {
+  inits(%C : memref<4x4x4x4xf32>) {
   ^bb0(%arg0: f32, %arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32):
     %s = arith.subf %arg0, %arg1 : f32
     %a = arith.addf %arg2, %s : f32
@@ -725,7 +725,7 @@
                          %C: memref<16x14x7x8xf32>, %D: memref<7x14x8x16xf32>) {
   linalg.generic #matmul_trait
     ins(%A, %B, %C : memref<14x7xf32>, memref<16x14xf32>, memref<16x14x7x8xf32>)
-   outs(%D : memref<7x14x8x16xf32>) {
+   inits(%D : memref<7x14x8x16xf32>) {
     ^bb(%a: f32, %b: f32, %c: f32, %d: f32) :
       %e = arith.addf %a, %b: f32
       %f = arith.addf %e, %c: f32
@@ -760,7 +760,7 @@
   //       CHECK:   %[[R:.*]] = vector.multi_reduction <add>, %[[MUL]], %[[V2]] [2] : vector<8x12x4xf32> to vector<8x12xf32>
   //       CHECK:   %[[W:.*]] = vector.transfer_write %[[R]], %[[ARG2]][%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<8x12xf32>, tensor<8x12xf32>
   %0 = linalg.matmul  ins(%arg0, %arg1: tensor<8x4xf32>, tensor<4x12xf32>)
-                     outs(%arg2: tensor<8x12xf32>)
+                     inits(%arg2: tensor<8x12xf32>)
     -> tensor<8x12xf32>
   //       CHECK:   return %[[W]] : tensor<8x12xf32>
   return %0 : tensor<8x12xf32>
@@ -847,7 +847,7 @@
 //       CHECK:   %[[V4:.*]] = arith.addi %[[DIM3]], %[[C3]] : index
 //       CHECK:   %[[V5:.*]] = arith.addi %[[V4]], %[[C2]] : index
 //       CHECK:   %[[INIT:.*]] = tensor.empty(%[[V1]], %[[V2]], %[[V5]]) : tensor<6x?x?x?xf32>
-//       CHECK:   %[[FILL:.*]] = linalg.fill ins(%{{.*}} : f32) outs(%[[INIT]] : tensor<6x?x?x?xf32>) -> tensor<6x?x?x?xf32>
+//       CHECK:   %[[FILL:.*]] = linalg.fill ins(%{{.*}} : f32) inits(%[[INIT]] : tensor<6x?x?x?xf32>) -> tensor<6x?x?x?xf32>
 //       CHECK:   %[[SRCDIM:.*]] = tensor.dim %[[SRC]], %[[C3]] : tensor<1x2x2x?xf32>
 //       CHECK:   %[[RESULT:.*]] = tensor.insert_slice %[[SRC]] into %[[FILL]][2, %[[LOW]], 3, 3] [1, 2, 2, %[[SRCDIM]]] [1, 1, 1, 1] : tensor<1x2x2x?xf32> into tensor<6x?x?x?xf32>
 //       CHECK:   return %[[RESULT]]
@@ -1105,7 +1105,7 @@
         affine_map<(d0, d1, d2) -> (d0, d1)>
       ],
       iterator_types = ["parallel", "parallel", "reduction"]
-    } ins(%input : tensor<4x16x8xf32>) outs(%output : tensor<4x16xf32>) {
+    } ins(%input : tensor<4x16x8xf32>) inits(%output : tensor<4x16xf32>) {
     ^bb0(%arg0: f32, %arg1: f32):
       %1 = math.exp %arg0 : f32
       %2 = arith.addf %1, %arg1 : f32
@@ -1148,7 +1148,7 @@
         affine_map<(d0, d1, d2, d3) -> (d3, d0)>
       ],
       iterator_types = ["parallel", "reduction", "reduction", "parallel"]
-    } ins(%input, %input_2 : tensor<3x2xf32>, tensor<5x4xf32>) outs(%output : tensor<5x2xf32>) {
+    } ins(%input, %input_2 : tensor<3x2xf32>, tensor<5x4xf32>) inits(%output : tensor<5x2xf32>) {
     ^bb0(%arg0: f32, %arg1: f32, %arg2: f32):
       %1 = math.exp %arg0 : f32
       %2 = math.exp %arg1 : f32
@@ -1177,11 +1177,11 @@
   // CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32>
   %ident = arith.constant -3.40282e+38 : f32
   %init = tensor.empty() : tensor<4xf32>
-  %fill = linalg.fill ins(%ident : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32>
+  %fill = linalg.fill ins(%ident : f32) inits(%init : tensor<4xf32>) -> tensor<4xf32>
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0)>],
                          iterator_types = ["parallel", "reduction"]}
-                         ins(%arg0 : tensor<4x4xf32>) outs(%fill : tensor<4xf32>) {
+                         ins(%arg0 : tensor<4x4xf32>) inits(%fill : tensor<4xf32>) {
   ^bb0(%in0: f32, %out0: f32):
     %max = arith.maxf %in0, %out0 : f32
     linalg.yield %max : f32
@@ -1208,11 +1208,11 @@
   // CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32>
   %maxf32 = arith.constant 3.40282e+38 : f32
   %init = tensor.empty() : tensor<4xf32>
-  %fill = linalg.fill ins(%maxf32 : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32>
+  %fill = linalg.fill ins(%maxf32 : f32) inits(%init : tensor<4xf32>) -> tensor<4xf32>
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0)>],
                          iterator_types = ["parallel", "reduction"]}
-                         ins(%arg0 : tensor<4x4xf32>) outs(%fill : tensor<4xf32>) {
+                         ins(%arg0 : tensor<4x4xf32>) inits(%fill : tensor<4xf32>) {
   ^bb0(%in0: f32, %out0: f32):
     %min = arith.minf %out0, %in0 : f32
     linalg.yield %min : f32
@@ -1238,11 +1238,11 @@
   // CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32>
   %ident = arith.constant 1.0 : f32
   %init = tensor.empty() : tensor<4xf32>
-  %fill = linalg.fill ins(%ident : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32>
+  %fill = linalg.fill ins(%ident : f32) inits(%init : tensor<4xf32>) -> tensor<4xf32>
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0)>],
                          iterator_types = ["parallel", "reduction"]}
-                         ins(%arg0 : tensor<4x4xf32>) outs(%fill : tensor<4xf32>) {
+                         ins(%arg0 : tensor<4x4xf32>) inits(%fill : tensor<4xf32>) {
   ^bb0(%in0: f32, %out0: f32):
     %mul = arith.mulf %in0, %out0 : f32
     linalg.yield %mul : f32
@@ -1268,11 +1268,11 @@
   // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
   %ident = arith.constant false
   %init = tensor.empty() : tensor<4xi1>
-  %fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) -> tensor<4xi1>
+  %fill = linalg.fill ins(%ident : i1) inits(%init : tensor<4xi1>) -> tensor<4xi1>
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0)>],
                          iterator_types = ["parallel", "reduction"]}
-                         ins(%arg0 : tensor<4x4xi1>) outs(%fill : tensor<4xi1>) {
+                         ins(%arg0 : tensor<4x4xi1>) inits(%fill : tensor<4xi1>) {
   ^bb0(%in0: i1, %out0: i1):
     %or = arith.ori %in0, %out0 : i1
     linalg.yield %or : i1
@@ -1298,11 +1298,11 @@
   // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
   %ident = arith.constant true
   %init = tensor.empty() : tensor<4xi1>
-  %fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) -> tensor<4xi1>
+  %fill = linalg.fill ins(%ident : i1) inits(%init : tensor<4xi1>) -> tensor<4xi1>
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0)>],
                          iterator_types = ["parallel", "reduction"]}
-                         ins(%arg0 : tensor<4x4xi1>) outs(%fill : tensor<4xi1>) {
+                         ins(%arg0 : tensor<4x4xi1>) inits(%fill : tensor<4xi1>) {
   ^bb0(%in0: i1, %out0: i1):
     %and = arith.andi %in0, %out0 : i1
     linalg.yield %and : i1
@@ -1328,11 +1328,11 @@
   // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
   %ident = arith.constant false
   %init = tensor.empty() : tensor<4xi1>
-  %fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) -> tensor<4xi1>
+  %fill = linalg.fill ins(%ident : i1) inits(%init : tensor<4xi1>) -> tensor<4xi1>
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0)>],
                          iterator_types = ["parallel", "reduction"]}
-                         ins(%arg0 : tensor<4x4xi1>) outs(%fill : tensor<4xi1>) {
+                         ins(%arg0 : tensor<4x4xi1>) inits(%fill : tensor<4xi1>) {
   ^bb0(%in0: i1, %out0: i1):
     %xor = arith.xori %in0, %out0 : i1
     linalg.yield %xor : i1
@@ -1360,13 +1360,13 @@
   // CHECK: vector.transfer_write {{.*}} {in_bounds = [true, true]} : vector<4x4xf32>, tensor<4x4xf32>
   %c0 = arith.constant 0.0 : f32
   %init = tensor.empty() : tensor<4x4xf32>
-  %fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<4x4xf32>) -> tensor<4x4xf32>
+  %fill = linalg.fill ins(%c0 : f32) inits(%init : tensor<4x4xf32>) -> tensor<4x4xf32>
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0, 0)>,
                                           affine_map<(d0, d1) -> (d0, d1)>],
    iterator_types = ["parallel", "parallel"]}
    ins(%arg0, %arg1 : tensor<4x4xf32>, tensor<4x1xf32>)
-   outs(%fill : tensor<4x4xf32>) {
+   inits(%fill : tensor<4x4xf32>) {
     ^bb0(%arg7: f32, %arg8: f32, %arg9: f32):
       %40 = arith.subf %arg7, %arg8 : f32
       linalg.yield %40 : f32
@@ -1396,13 +1396,13 @@
   // CHECK: vector.transfer_write {{.*}} {in_bounds = [true]} : vector<4xf32>, tensor<4xf32>
   %c0 = arith.constant 0.0 : f32
   %init = tensor.empty() : tensor<4xf32>
-  %fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32>
+  %fill = linalg.fill ins(%c0 : f32) inits(%init : tensor<4xf32>) -> tensor<4xf32>
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0, 0)>,
                                           affine_map<(d0, d1) -> (d0)>],
    iterator_types = ["parallel", "reduction"]}
    ins(%arg0, %arg1 : tensor<4x4xf32>, tensor<4x1xf32>)
-   outs(%fill : tensor<4xf32>) {
+   inits(%fill : tensor<4xf32>) {
     ^bb0(%arg7: f32, %arg8: f32, %arg9: f32):
       %40 = arith.subf %arg7, %arg8 : f32
       %41 = math.exp %40 : f32
@@ -1437,7 +1437,7 @@
   //      CHECK: %[[init:.*]] = tensor.empty() : tensor<f32>
   %0 = tensor.empty() : tensor<f32>
 
-  %1 = linalg.fill ins(%f0 : f32) outs(%0 : tensor<f32>) -> tensor<f32>
+  %1 = linalg.fill ins(%f0 : f32) inits(%0 : tensor<f32>) -> tensor<f32>
   //      CHECK: %[[r:.*]] = vector.transfer_read %[[A]][%[[C0]]]
   // CHECK-SAME:   : tensor<32xf32>, vector<32xf32>
   //      CHECK: %[[f0:.*]] = vector.extractelement %[[vF0]][] : vector<f32>
@@ -1451,7 +1451,7 @@
                           affine_map<(d0) -> ()>],
          iterator_types = ["reduction"]}
          ins(%arg0 : tensor<32xf32>)
-         outs(%1 : tensor<f32>) {
+         inits(%1 : tensor<f32>) {
     ^bb0(%a: f32, %b: f32):
       %3 = arith.addf %a, %b : f32
       linalg.yield %3 : f32
@@ -1478,13 +1478,13 @@
 func.func @not_projected_permutation(%arg0: tensor<8x8xf32>) -> tensor<6x6x3x3xf32> {
   %c0 = arith.constant 0.0 : f32
   %init = tensor.empty() : tensor<6x6x3x3xf32>
-  %fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<6x6x3x3xf32>) -> tensor<6x6x3x3xf32>
+  %fill = linalg.fill ins(%c0 : f32) inits(%init : tensor<6x6x3x3xf32>) -> tensor<6x6x3x3xf32>
   // CHECK: linalg.generic
   %result = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0 + d2, d1 + d3)>,
                                              affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>],
    iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
    ins(%arg0 : tensor<8x8xf32>)
-   outs(%fill : tensor<6x6x3x3xf32>) {
+   inits(%fill : tensor<6x6x3x3xf32>) {
     ^bb0(%arg7: f32, %arg9: f32):
       linalg.yield %arg7 : f32
     } -> tensor<6x6x3x3xf32>
@@ -1509,7 +1509,7 @@
                        affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>],
       iterator_types = ["parallel", "parallel", "reduction"]}
       ins(%arg0, %arg1 : tensor<2x4x8xf32>, tensor<2x4xf32>)
-      outs(%arg2, %arg3 : tensor<2x4x8xf32>, tensor<2x4xf32>) {
+      inits(%arg2, %arg3 : tensor<2x4x8xf32>, tensor<2x4xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32):
       %1 = arith.mulf %b0, %b1 : f32
       %2 = arith.addf %1, %b3 : f32
@@ -1545,7 +1545,7 @@
   %1 = linalg.generic {
     indexing_maps = [#map0, #map1],
     iterator_types = ["parallel", "parallel", "parallel", "parallel"]
-  } ins(%arg1 : tensor<4x3xi32>) outs(%arg2 : tensor<4x7x3x2xf32>) {
+  } ins(%arg1 : tensor<4x3xi32>) inits(%arg2 : tensor<4x7x3x2xf32>) {
   ^bb0(%arg3: i32, %arg4: f32):
     %2 = arith.index_cast %arg3 : i32 to index
     %3 = tensor.extract %arg0[%2] : tensor<3xf32>
@@ -1582,7 +1582,7 @@
   %2 = linalg.generic {
     indexing_maps = [#map1],
     iterator_types = ["parallel", "parallel", "parallel"]
-  } outs(%arg2 : tensor<1x1x3xf32>) {
+  } inits(%arg2 : tensor<1x1x3xf32>) {
   ^bb0(%arg4: f32):
     %7 = tensor.extract %arg0[%c0, %c1] : tensor<3x3xf32>
     linalg.yield %7 : f32
@@ -1616,7 +1616,7 @@
   %1 = linalg.generic {
     indexing_maps = [#map1],
     iterator_types = ["parallel", "parallel", "parallel"]
-  } outs(%arg2 : tensor<1x1x3xf32>) {
+  } inits(%arg2 : tensor<1x1x3xf32>) {
   ^bb0(%arg4: f32):
     %2 = linalg.index 0 : index
     %3 = linalg.index 1 : index
@@ -1655,7 +1655,7 @@
   %25 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
-  } outs(%extracted_slice : tensor<1x4xf32>) {
+  } inits(%extracted_slice : tensor<1x4xf32>) {
   ^bb0(%out: f32):
     %26 = linalg.index 0 : index
     %27 = arith.addi %arg0, %26 : index
@@ -1707,7 +1707,7 @@
   %2 = linalg.generic {
     indexing_maps = [#map0, #map0, #map1, #map2],
     iterator_types = ["parallel", "parallel", "parallel", "parallel"]
-  } ins(%arg1, %arg2, %arg3 : tensor<4x3xi32>, tensor<4x3xi32>, tensor<4x7x2xf32>) outs(%arg4 : tensor<4x7x3x2xf32>) {
+  } ins(%arg1, %arg2, %arg3 : tensor<4x3xi32>, tensor<4x3xi32>, tensor<4x7x2xf32>) inits(%arg4 : tensor<4x7x3x2xf32>) {
   ^bb0(%arg5: i32, %arg6: i32, %arg7: f32, %arg8: f32):
     %3 = arith.index_cast %arg5 : i32 to index
     %4 = arith.index_cast %arg6 : i32 to index
@@ -1752,7 +1752,7 @@
  %c5 = arith.constant 5 : index
  %c0 = arith.constant 0 : index
  %0 = tensor.empty() : tensor<5xf32>
- %1 = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel"]} outs(%0 : tensor<5xf32>) {
+ %1 = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel"]} inits(%0 : tensor<5xf32>) {
  ^bb0(%out: f32):
    %2 = linalg.index 0 : index
    %extracted = tensor.extract %arg1[%2] : tensor<5xi32>
@@ -1798,7 +1798,7 @@
   %1 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
-  } outs(%extracted_slice : tensor<1x4xf32>) {
+  } inits(%extracted_slice : tensor<1x4xf32>) {
   ^bb0(%out: f32):
     %2 = linalg.index 1 : index
     %3 = affine.apply affine_map<(d0, d1) -> (d0 + d1)>(%2, %arg0)
@@ -1840,7 +1840,7 @@
   %1 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
-  } outs(%extracted_slice : tensor<1x4xf32>) {
+  } inits(%extracted_slice : tensor<1x4xf32>) {
   ^bb0(%out: f32):
     %2 = linalg.index 1 : index
     %3 = affine.apply affine_map<(d0, d1) -> (d0 + d1)>(%2, %arg0)
@@ -1884,7 +1884,7 @@
   %1 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
-  } outs(%extracted_slice : tensor<1x4xf32>) {
+  } inits(%extracted_slice : tensor<1x4xf32>) {
   ^bb0(%out: f32):
     %2 = linalg.index 1 : index
     %3 = arith.maxsi %2, %c79 : index
@@ -1924,7 +1924,7 @@
   %1 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
-  } outs(%extracted_slice : tensor<1x4xf32>) {
+  } inits(%extracted_slice : tensor<1x4xf32>) {
   ^bb0(%out: f32):
     %2 = linalg.index 0 : index
     %3 = linalg.index 1 : index
@@ -1962,7 +1962,7 @@
 #map = affine_map<(d0) -> (d0)>
 func.func @vectorize_nd_tensor_extract_block_arg(%arg0: tensor<5x6xf32>, %arg1: tensor<5xindex>) -> tensor<5xf32> {
  %0 = tensor.empty() : tensor<5xf32>
- %1 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg1: tensor<5xindex>) outs(%0 : tensor<5xf32>) {
+ %1 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg1: tensor<5xindex>) inits(%0 : tensor<5xf32>) {
  ^bb0(%in: index, %out: f32):
    %2 = linalg.index 0 : index
    %extracted_0 = tensor.extract %arg0[%in, %2] : tensor<5x6xf32>
@@ -2001,7 +2001,7 @@
 func.func @vectorize_map(%arg0: memref<64xf32>,
     %arg1: memref<64xf32>, %arg2: memref<64xf32>) {
   linalg.map ins(%arg0, %arg1 : memref<64xf32>, memref<64xf32>)
-             outs(%arg2 : memref<64xf32>)
+             inits(%arg2 : memref<64xf32>)
     (%in: f32, %in_0: f32) {
       %0 = arith.addf %in, %in_0 : f32
       linalg.yield %0 : f32
@@ -2025,7 +2025,7 @@
 func.func @vectorize_transpose(%arg0: memref<16x32x64xf32>,
                                %arg1: memref<32x64x16xf32>) {
   linalg.transpose ins(%arg0 : memref<16x32x64xf32>)
-                   outs(%arg1 : memref<32x64x16xf32>) permutation = [1, 2, 0]
+                   inits(%arg1 : memref<32x64x16xf32>) permutation = [1, 2, 0]
   return
 }
 // CHECK-LABEL: func @vectorize_transpose
@@ -2044,7 +2044,7 @@
 func.func @vectorize_reduce(%arg0: memref<16x32x64xf32>,
                   %arg1: memref<16x64xf32>) {
   linalg.reduce ins(%arg0 : memref<16x32x64xf32>)
-                outs(%arg1 : memref<16x64xf32>) dimensions = [1]
+                inits(%arg1 : memref<16x64xf32>) dimensions = [1]
     (%in: f32, %init: f32) {
       %0 = arith.addf %in, %init : f32
       linalg.yield %0 : f32
@@ -2072,7 +2072,7 @@
                                          affine_map<(d0) -> (d0)>],
                    iterator_types = ["parallel"] }
     ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
-    outs(%arg2 : tensor<?xf32>) {
+    inits(%arg2 : tensor<?xf32>) {
     ^bb(%in0: f32, %in1: f32, %out: f32) :
       %0 = arith.addf %in0, %in1 : f32
       linalg.yield %0 : f32
@@ -2106,7 +2106,7 @@
                                          affine_map<(d0) -> (d0)>],
                         iterator_types = ["parallel"] }
     ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
-    outs(%arg2 : tensor<?xf32>) {
+    inits(%arg2 : tensor<?xf32>) {
     ^bb(%in0: f32, %in1: f32, %out: f32) :
       %0 = arith.addf %in0, %in1 : f32
       linalg.yield %0 : f32
@@ -2140,7 +2140,7 @@
                                          affine_map<(d0, d1) -> (d0, d1)>],
                         iterator_types = ["parallel", "parallel"] }
     ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%arg2 : tensor<?x?xf32>) {
+    inits(%arg2 : tensor<?x?xf32>) {
     ^bb(%in0: f32, %in1: f32, %out: f32) :
       %0 = arith.addf %in0, %in1 : f32
       linalg.yield %0 : f32
@@ -2178,7 +2178,7 @@
                                          affine_map<(d0, d1) -> (d0, d1)>],
                         iterator_types = ["parallel", "parallel"] }
     ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%arg2 : tensor<?x?xf32>) {
+    inits(%arg2 : tensor<?x?xf32>) {
     ^bb(%in0: f32, %in1: f32, %out: f32) :
       %0 = arith.addf %in0, %in1 : f32
       linalg.yield %0 : f32
@@ -2213,7 +2213,7 @@
                                          affine_map<(d0, d1) -> (d0)>],
                         iterator_types = ["parallel", "reduction"] }
     ins(%arg0 : tensor<?x?xf32>)
-    outs(%arg1 : tensor<?xf32>) {
+    inits(%arg1 : tensor<?xf32>) {
     ^bb(%in: f32, %out: f32) :
       %0 = arith.addf %in, %out : f32
       linalg.yield %0 : f32
@@ -2251,7 +2251,7 @@
                                          affine_map<(d0, d1, d2) -> (d2, d1)>],
                         iterator_types = ["reduction", "parallel", "parallel"] }
     ins(%arg0 : tensor<?x?x?xf32>)
-    outs(%arg1 : tensor<?x?xf32>) {
+    inits(%arg1 : tensor<?x?xf32>) {
     ^bb(%in: f32, %out: f32) :
       %0 = arith.addf %in, %out : f32
       linalg.yield %0 : f32
@@ -2295,7 +2295,7 @@
     %expanded = tensor.expand_shape %extracted_slice [[0, 1]] : tensor<?xf32> into tensor<1x?xf32>
     %extracted_slice_0 = tensor.extract_slice %arg0[0, %arg3] [1, %arg2] [1, 1] : tensor<1x?xf32> to tensor<?xf32>
     %extracted_slice_1 = tensor.extract_slice %expanded[0, %arg3] [1, %arg2] [1, 1] : tensor<1x?xf32> to tensor<?xf32>
-    %2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%extracted_slice_0 : tensor<?xf32>) outs(%extracted_slice_1 : tensor<?xf32>) {
+    %2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%extracted_slice_0 : tensor<?xf32>) inits(%extracted_slice_1 : tensor<?xf32>) {
     ^bb0(%in: f32, %out: f32):
       %3 = arith.addf %in, %out : f32
       linalg.yield %3 : f32
@@ -2325,8 +2325,8 @@
   %cst_6 = arith.constant 4.000000e+00 : f32
   %1 = scf.for %arg0 = %c0 to %c64 step %c4 iter_args(%arg1 = %input) -> (tensor<120x64xf32>) {
     %extracted_slice = tensor.extract_slice %arg1[%c0, %arg0] [1, 4] [1, 1] : tensor<120x64xf32> to tensor<1x4xf32>
-    %10 = linalg.fill {__internal_linalg_transform__ = "1"} ins(%cst_6 : f32) outs(%extracted_slice : tensor<1x4xf32>) -> tensor<1x4xf32>
-    %11 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} outs(%10 : tensor<1x4xf32>) {
+    %10 = linalg.fill {__internal_linalg_transform__ = "1"} ins(%cst_6 : f32) inits(%extracted_slice : tensor<1x4xf32>) -> tensor<1x4xf32>
+    %11 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} inits(%10 : tensor<1x4xf32>) {
     ^bb0(%out: f32):
       %12 = linalg.index 0 : index
       %13 = arith.addi %arg0, %12 : index
@@ -2362,7 +2362,7 @@
   %0 = linalg.generic
   {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>],
   iterator_types = ["reduction"]}
-  ins(%arg0 : tensor<0xf32>) outs(%arg1 : tensor<f32>) {
+  ins(%arg0 : tensor<0xf32>) inits(%arg1 : tensor<f32>) {
     ^bb0(%in: f32, %out: f32):
     %12 = arith.addf %out, %in : f32
     linalg.yield %12 : f32
@@ -2387,7 +2387,7 @@
                                          affine_map<(d0, d1) -> (d0, d1)>],
                    iterator_types = ["parallel", "parallel"] }
     ins(%arg0, %arg1 : tensor<8x?xf32>, tensor<8x?xf32>)
-    outs(%arg2 : tensor<8x?xf32>) {
+    inits(%arg2 : tensor<8x?xf32>) {
     ^bb(%in0: f32, %in1: f32, %out: f32) :
       %0 = arith.addf %in0, %in1 : f32
       linalg.yield %0 : f32
@@ -2428,7 +2428,7 @@
   %2 = linalg.generic {
     indexing_maps = [#map1],
     iterator_types = ["parallel", "parallel"]
-  } outs(%arg1 : tensor<?x?xf32>) {
+  } inits(%arg1 : tensor<?x?xf32>) {
   ^bb0(%arg3: f32):
     %7 = tensor.extract %arg0[%c0, %c1] : tensor<?x?xf32>
     linalg.yield %7 : f32
@@ -2479,7 +2479,7 @@
                                          affine_map<(d0, d1) -> (d0, d1)>],
                    iterator_types = ["parallel", "parallel"] }
     ins(%arg0, %arg1 : tensor<8x32xf32>, tensor<8x32xf32>)
-    outs(%arg2 : tensor<8x32xf32>) {
+    inits(%arg2 : tensor<8x32xf32>) {
     ^bb(%in0: f32, %in1: f32, %out: f32) :
       %0 = arith.addf %in0, %in1 : f32
       linalg.yield %0 : f32
@@ -2506,7 +2506,7 @@
                                          affine_map<(d0, d1) -> (d0, d1)>],
                    iterator_types = ["parallel", "parallel"] }
     ins(%arg0, %arg1 : tensor<8x30xf32>, tensor<8x30xf32>)
-    outs(%arg2 : tensor<8x30xf32>) {
+    inits(%arg2 : tensor<8x30xf32>) {
     ^bb(%in0: f32, %in1: f32, %out: f32) :
       %0 = arith.addf %in0, %in1 : f32
       linalg.yield %0 : f32
diff --git a/mlir/test/Dialect/Linalg/vectorize-convolution.mlir b/mlir/test/Dialect/Linalg/vectorize-convolution.mlir
--- a/mlir/test/Dialect/Linalg/vectorize-convolution.mlir
+++ b/mlir/test/Dialect/Linalg/vectorize-convolution.mlir
@@ -4,7 +4,7 @@
   linalg.conv_1d_nwc_wcf
     {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>}
     ins(%input, %filter : memref<4x6x3xf32>, memref<1x3x8xf32>)
-    outs(%output : memref<4x2x8xf32>)
+    inits(%output : memref<4x2x8xf32>)
   return
 }
 
@@ -67,7 +67,7 @@
   linalg.conv_1d_nwc_wcf
     {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>}
     ins(%input, %filter : memref<4x6x3xi8>, memref<1x3x8xi8>)
-    outs(%output : memref<4x2x8xi32>)
+    inits(%output : memref<4x2x8xi32>)
   return
 }
 
@@ -129,7 +129,7 @@
   linalg.conv_1d_nwc_wcf
     {dilations = dense<2> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>}
     ins(%input, %filter : memref<4x6x3xf32>, memref<2x3x8xf32>)
-    outs(%output : memref<4x2x8xf32>)
+    inits(%output : memref<4x2x8xf32>)
   return
 }
 
@@ -206,7 +206,7 @@
   linalg.conv_1d_nwc_wcf
     {dilations = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %filter : memref<4x6x3xf32>, memref<2x3x8xf32>)
-    outs(%output : memref<4x2x8xf32>)
+    inits(%output : memref<4x2x8xf32>)
   return
 }
 
@@ -255,7 +255,7 @@
   linalg.conv_1d_ncw_fcw
     {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>}
     ins(%input, %filter : memref<4x3x6xf32>, memref<8x3x1xf32>)
-    outs(%output : memref<4x8x2xf32>)
+    inits(%output : memref<4x8x2xf32>)
   return
 }
 
@@ -324,7 +324,7 @@
   linalg.conv_1d_ncw_fcw
     {dilations = dense<2> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>}
     ins(%input, %filter : memref<4x3x6xf32>, memref<8x3x2xf32>)
-    outs(%output : memref<4x8x2xf32>)
+    inits(%output : memref<4x8x2xf32>)
   return
 }
 
@@ -409,7 +409,7 @@
   linalg.conv_1d_ncw_fcw
     {dilations = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %filter : memref<4x3x6xf32>, memref<8x3x2xf32>)
-    outs(%output : memref<4x8x2xf32>)
+    inits(%output : memref<4x8x2xf32>)
   return
 }
 
@@ -465,7 +465,7 @@
 
 func.func @conv1d_8_tensor(%input: tensor<11xf32>, %filter: tensor<4xf32>, %output: tensor<8xf32>) -> tensor<8xf32> {
   %0 = linalg.conv_1d ins(%input, %filter : tensor<11xf32>, tensor<4xf32>)
-                     outs(%output : tensor<8xf32>) -> tensor<8xf32>
+                     inits(%output : tensor<8xf32>) -> tensor<8xf32>
   return %0 : tensor<8xf32>
 }
 
@@ -520,7 +520,7 @@
   linalg.depthwise_conv_1d_nwc_wc
     {dilations = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %filter : memref<3x5x4xf32>, memref<2x4xf32>)
-    outs(%output : memref<3x2x4xf32>)
+    inits(%output : memref<3x2x4xf32>)
   return
 }
 
@@ -561,7 +561,7 @@
   linalg.depthwise_conv_1d_nwc_wc
     {dilations = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %filter : memref<3x5x4xi8>, memref<2x4xi8>)
-    outs(%output : memref<3x2x4xi32>)
+    inits(%output : memref<3x2x4xi32>)
   return
 }
 
@@ -606,7 +606,7 @@
   linalg.conv_1d_nwc_wcf
   {dilations = dense<1> : vector<1xi64>, strides = dense<1> : vector<1xi64>}
    ins(%input, %filter : memref<1x2x3xf16>, memref<1x3x2xf16>)
-   outs(%output : memref<1x2x2xf32>)
+   inits(%output : memref<1x2x2xf32>)
   return
 }
 
@@ -631,7 +631,7 @@
   linalg.pooling_nwc_sum
     {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>}
     ins(%input, %filter : memref<4x4x3xf32>, memref<1xf32>)
-    outs(%output : memref<4x2x3xf32>)
+    inits(%output : memref<4x2x3xf32>)
   return
 }
 
@@ -657,7 +657,7 @@
   linalg.pooling_nwc_max
     {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>}
     ins(%input, %filter : memref<4x4x3xf32>, memref<1xf32>)
-    outs(%output : memref<4x2x3xf32>)
+    inits(%output : memref<4x2x3xf32>)
   return
 }
 
@@ -685,7 +685,7 @@
   linalg.pooling_nwc_sum
     {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>}
     ins(%input, %filter : memref<4x4x3xi8>, memref<1xi8>)
-    outs(%output : memref<4x2x3xi32>)
+    inits(%output : memref<4x2x3xi32>)
   return
 }
 
@@ -717,7 +717,7 @@
   linalg.pooling_nwc_max
     {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>}
     ins(%input, %filter : memref<4x4x3xi8>, memref<1xi8>)
-    outs(%output : memref<4x2x3xi32>)
+    inits(%output : memref<4x2x3xi32>)
   return
 }
 
@@ -747,7 +747,7 @@
   linalg.pooling_nwc_sum
     {dilations = dense<2> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>}
     ins(%input, %filter : memref<4x6x3xf32>, memref<2xf32>)
-    outs(%output : memref<4x2x3xf32>)
+    inits(%output : memref<4x2x3xf32>)
   return
 }
 
@@ -778,7 +778,7 @@
   linalg.pooling_ncw_sum
     {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>}
     ins(%input, %filter : memref<4x3x4xf32>, memref<1xf32>)
-    outs(%output : memref<4x3x2xf32>)
+    inits(%output : memref<4x3x2xf32>)
   return
 }
 
@@ -808,7 +808,7 @@
   linalg.pooling_nwc_sum
   {dilations = dense<1> : vector<1xi64>, strides = dense<1> : vector<1xi64>}
    ins(%input, %filter : memref<1x2x3xf16>, memref<1xf16>)
-   outs(%output : memref<1x2x3xf32>)
+   inits(%output : memref<1x2x3xf32>)
   return
 }
 
@@ -829,7 +829,7 @@
   linalg.pooling_nwc_sum
     {dilations = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %filter : memref<4x4x3xf32>, memref<2xf32>)
-    outs(%output : memref<4x2x3xf32>)
+    inits(%output : memref<4x2x3xf32>)
   return
 }
 
@@ -852,7 +852,7 @@
   linalg.pooling_ncw_sum
     {dilations = dense<2> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>}
     ins(%input, %filter : memref<4x3x6xf32>, memref<2xf32>)
-    outs(%output : memref<4x3x2xf32>)
+    inits(%output : memref<4x3x2xf32>)
   return
 }
 
@@ -885,7 +885,7 @@
   linalg.pooling_ncw_sum
     {dilations = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %filter : memref<4x2x5xf32>, memref<2xf32>)
-    outs(%output : memref<4x2x3xf32>)
+    inits(%output : memref<4x2x3xf32>)
   return
 }
 
diff --git a/mlir/test/Dialect/SCF/foreach-thread-canonicalization.mlir b/mlir/test/Dialect/SCF/foreach-thread-canonicalization.mlir
--- a/mlir/test/Dialect/SCF/foreach-thread-canonicalization.mlir
+++ b/mlir/test/Dialect/SCF/foreach-thread-canonicalization.mlir
@@ -5,9 +5,9 @@
   %c2 = arith.constant 2 : index
   %cst_0 = arith.constant -0.000000e+00 : f32
   %0 = memref.alloc() : memref<128x384xf32>
-  linalg.fill ins(%cst_0 : f32) outs(%0 : memref<128x384xf32>)
+  linalg.fill ins(%cst_0 : f32) inits(%0 : memref<128x384xf32>)
   %2 = memref.alloc() : memref<128xf32>
-  linalg.fill ins(%cst_0 : f32) outs(%2 : memref<128xf32>)
+  linalg.fill ins(%cst_0 : f32) inits(%2 : memref<128xf32>)
   scf.forall (%arg0) in (%c2) {
     %7 = affine.min affine_map<(d0) -> (d0 * -64 + 128, 64)>(%arg0)
     %8 = affine.max affine_map<(d0) -> (0, d0)>(%7)
@@ -21,12 +21,12 @@
     %12 = memref.subview %2[%9] [%10] [1] :
       memref<128xf32> to memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
 
-    // CHECK: linalg.generic {{.*}} ins(%{{.*}} : memref<?x384xf32, {{.*}}>) outs(%{{.*}} : memref<?xf32, {{.*}}>)
+    // CHECK: linalg.generic {{.*}} ins(%{{.*}} : memref<?x384xf32, {{.*}}>) inits(%{{.*}} : memref<?xf32, {{.*}}>)
     linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                       affine_map<(d0, d1) -> (d0)>],
                      iterator_types = ["parallel", "reduction"]}
       ins(%11 : memref<?x384xf32, affine_map<(d0, d1)[s0] -> (d0 * 384 + s0 + d1)>>)
-      outs(%12 : memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>) {
+      inits(%12 : memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>) {
         ^bb0(%arg1: f32, %arg2: f32):
           %14 = arith.addf %arg1, %arg2 : f32
           linalg.yield %14 : f32
diff --git a/mlir/test/Dialect/SCF/loop-pipelining.mlir b/mlir/test/Dialect/SCF/loop-pipelining.mlir
--- a/mlir/test/Dialect/SCF/loop-pipelining.mlir
+++ b/mlir/test/Dialect/SCF/loop-pipelining.mlir
@@ -566,7 +566,7 @@
 // CHECK-SAME: iter_args(%[[IA:.+]] = %[[PAV0]], %[[IB:.+]] = %[[PBV0:.+]])
 // CHECK:     %[[CV:.+]] = memref.subview %[[ARG2]]
 // CHECK:     linalg.generic
-// CHECK-SAME:  ins(%[[IA]], %[[IB]], %{{.*}} : {{.*}}) outs(%[[CV]] :
+// CHECK-SAME:  ins(%[[IA]], %[[IB]], %{{.*}} : {{.*}}) inits(%[[CV]] :
 // CHECK:     %[[NEXT:.+]] = arith.addi %[[IV]], %[[C1]]
 // CHECK:     %[[ASV:.+]] = memref.subview %[[ARG0]][%[[NEXT]]] [8] [1] :
 // CHECK:     %[[NEXT:.+]] = arith.addi %[[IV]], %[[C1]] :
@@ -581,7 +581,7 @@
 // CHECK:   }
 // CHECK:   %[[CV:.+]] = memref.subview %[[ARG2]][%[[C3]]] [8] [1] :
 // CHECK:   linalg.generic
-// CHECK-SAME: ins(%[[R]]#0, %[[R]]#1, %{{.*}} : {{.*}}) outs(%[[CV]] :
+// CHECK-SAME: ins(%[[R]]#0, %[[R]]#1, %{{.*}} : {{.*}}) inits(%[[CV]] :
 
 
 #map = affine_map<(d0)[s0]->(d0 + s0)>
@@ -615,7 +615,7 @@
     %C_view = memref.subview %result[%i0][8][1] { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 0 } : memref<?xf32> to memref<8xf32, #map>
     %scalar = arith.addf %cf, %cf {__test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 1} : f32
     linalg.generic #linalg_attrs ins(%a_buf_view, %b_buf_view, %scalar : memref<8xf32, #map>, memref<8xf32, #map>, f32)
-      outs(%C_view: memref<8xf32, #map>) {
+      inits(%C_view: memref<8xf32, #map>) {
       ^bb0(%a: f32, %b: f32, %s: f32, %c: f32):
         %add = arith.addf %a, %b : f32
         %accum = arith.addf %add, %c : f32
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
--- a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
@@ -620,9 +620,9 @@
     // CHECK: tensor.extract_slice {{.*}} {__inplace_operands_attr__ = ["true", "none"]}
     %6 = tensor.extract_slice %arg1[%arg0] [1] [1] : tensor<320xf32> to tensor<1xf32>
     // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
-    %7 = linalg.fill ins(%cst : f32) outs(%6 : tensor<1xf32>) -> tensor<1xf32>
+    %7 = linalg.fill ins(%cst : f32) inits(%6 : tensor<1xf32>) -> tensor<1xf32>
     // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
-    %8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<1xf32>) -> tensor<1xf32>
+    %8 = linalg.fill ins(%cst : f32) inits(%7 : tensor<1xf32>) -> tensor<1xf32>
 
     scf.forall.in_parallel {
       // CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]}
@@ -647,14 +647,14 @@
   %0 = bufferization.alloc_tensor() : tensor<4xf32>
 
   // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<4xf32>) -> tensor<4xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<4xf32>) -> tensor<4xf32>
 
   %2 = scf.for %arg5 = %arg2 to %arg3 step %arg4 iter_args(%arg6 = %arg1) -> (tensor<4xf32>) {
     // CHECK: tensor.extract {{.*}} {__inplace_operands_attr__ = ["true", "none"]}
     %4 = tensor.extract %1[%arg4] : tensor<4xf32>
     vector.print %4 : f32
     // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
-    %5 = linalg.fill ins(%cst2 : f32) outs(%0 : tensor<4xf32>) -> tensor<4xf32>
+    %5 = linalg.fill ins(%cst2 : f32) inits(%0 : tensor<4xf32>) -> tensor<4xf32>
     scf.yield %5 : tensor<4xf32>
   }
 
@@ -677,14 +677,14 @@
   %0 = bufferization.alloc_tensor() : tensor<4xf32>
 
   // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<4xf32>) -> tensor<4xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<4xf32>) -> tensor<4xf32>
 
   %2 = scf.for %arg5 = %arg2 to %arg3 step %arg4 iter_args(%arg6 = %arg1) -> (tensor<4xf32>) {
     // CHECK: tensor.extract {{.*}} {__inplace_operands_attr__ = ["true", "none"]}
     %4 = tensor.extract %1[%arg4] : tensor<4xf32>
     vector.print %4 : f32
     // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
-    %5 = linalg.fill ins(%cst2 : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32>
+    %5 = linalg.fill ins(%cst2 : f32) inits(%1 : tensor<4xf32>) -> tensor<4xf32>
     scf.yield %5 : tensor<4xf32>
   }
 
@@ -693,7 +693,7 @@
   %6 = tensor.extract %1[%arg4] : tensor<4xf32>
   vector.print %6 : f32
   // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
-  %7 = linalg.fill ins(%cst3 : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32>
+  %7 = linalg.fill ins(%cst3 : f32) inits(%1 : tensor<4xf32>) -> tensor<4xf32>
 
   return %2, %7 : tensor<4xf32>, tensor<4xf32>
 }
@@ -768,7 +768,7 @@
     // CHECK: tensor.extract_slice {{.*}} {__inplace_operands_attr__ = ["true"]}
     %2 = tensor.extract_slice %t[0][4][1] : tensor<10xf32> to tensor<4xf32>
     // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
-    %filled = linalg.fill ins(%cst : f32) outs(%2 : tensor<4xf32>) -> tensor<4xf32>
+    %filled = linalg.fill ins(%cst : f32) inits(%2 : tensor<4xf32>) -> tensor<4xf32>
     %3 = tensor.extract %filled[%a] : tensor<4xf32>
     vector.print %3 : f32
   }
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
--- a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
@@ -55,7 +55,7 @@
   // CHECK: %[[clone:.*]] = bufferization.clone %[[alloc]]
   // CHECK: scf.for {{.*}} iter_args(%{{.*}} = %[[clone]])
   %0 = scf.for %iv = %lb to %ub step %c1 iter_args(%1 = %A) -> tensor<?xf32> {
-    %r = linalg.fill ins(%cst : f32) outs(%1 : tensor<?xf32>) -> tensor<?xf32>
+    %r = linalg.fill ins(%cst : f32) inits(%1 : tensor<?xf32>) -> tensor<?xf32>
     scf.yield %B : tensor<?xf32>
   }
   %1 = tensor.extract %0[%c1] : tensor<?xf32>
@@ -547,8 +547,8 @@
   %2 = scf.forall (%arg3) in (%idx2) shared_outs(%o = %arg2) -> (tensor<?xf32>) {
       // CHECK: %[[subview:.*]] = memref.subview %[[arg2]][5] [%[[idx]]] [1]
       %6 = tensor.extract_slice %o[5] [%idx] [%c1] : tensor<?xf32> to tensor<?xf32>
-      // CHECK: linalg.fill ins(%{{.*}}) outs(%[[subview]] : memref<?xf32
-      %8 = linalg.fill ins(%cst : f32) outs(%6 : tensor<?xf32>) -> tensor<?xf32>
+      // CHECK: linalg.fill ins(%{{.*}}) inits(%[[subview]] : memref<?xf32
+      %8 = linalg.fill ins(%cst : f32) inits(%6 : tensor<?xf32>) -> tensor<?xf32>
       // Self-copy will DCE away later.
       // CHECK: memref.copy %[[subview]], %[[subview]]
 
@@ -594,8 +594,8 @@
       // CHECK: %[[subview1:.*]] = memref.subview %[[alloc1]][5] [%[[idx]]] [1]
       %6 = tensor.extract_slice %o[5] [%idx] [%c1] : tensor<?xf32> to tensor<?xf32>
 
-      // CHECK: linalg.fill ins(%{{.*}}) outs(%[[subview1]] : memref<?xf32
-      %8 = linalg.fill ins(%cst : f32) outs(%6 : tensor<?xf32>) -> tensor<?xf32>
+      // CHECK: linalg.fill ins(%{{.*}}) inits(%[[subview1]] : memref<?xf32
+      %8 = linalg.fill ins(%cst : f32) inits(%6 : tensor<?xf32>) -> tensor<?xf32>
 
       // Now the copy of the actual insert_slice. (It will fold away.)
       // CHECK: memref.copy %[[subview1]], %[[subview1]]
@@ -637,8 +637,8 @@
     %6 = tensor.extract_slice %arg1[0, %4] [8, 4] [1, 1] : tensor<8x8xf32> to tensor<8x4xf32>
     %7 = tensor.extract_slice %o[%1, %4] [4, 4] [1, 1] : tensor<8x8xf32> to tensor<4x4xf32>
 
-    //      CHECK: linalg.matmul ins({{.*}}memref<4x8xf32, strided<[?, ?], offset: ?>>, memref<8x4xf32, strided<[?, ?], offset: ?>>) outs({{.*}} : memref<4x4xf32, strided<[?, ?], offset: ?>>)
-    %8 = linalg.matmul ins(%3, %6 : tensor<4x8xf32>, tensor<8x4xf32>) outs(%7 : tensor<4x4xf32>) -> tensor<4x4xf32>
+    //      CHECK: linalg.matmul ins({{.*}}memref<4x8xf32, strided<[?, ?], offset: ?>>, memref<8x4xf32, strided<[?, ?], offset: ?>>) inits({{.*}} : memref<4x4xf32, strided<[?, ?], offset: ?>>)
+    %8 = linalg.matmul ins(%3, %6 : tensor<4x8xf32>, tensor<8x4xf32>) inits(%7 : tensor<4x4xf32>) -> tensor<4x4xf32>
     scf.forall.in_parallel {
       tensor.parallel_insert_slice %8 into %o[%1, %4] [4, 4] [1, 1] : tensor<4x4xf32> into tensor<8x8xf32>
     }
@@ -719,8 +719,8 @@
   %c0 = arith.constant 0 : index
   // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32, 1>
   %alloc = bufferization.alloc_tensor() {memory_space = 1 : i64} : tensor<5xf32>
-  // CHECK: linalg.fill {{.*}} outs(%[[alloc]] : memref<5xf32, 1>)
-  %filled = linalg.fill ins(%cst : f32) outs(%alloc : tensor<5xf32>) -> tensor<5xf32>
+  // CHECK: linalg.fill {{.*}} inits(%[[alloc]] : memref<5xf32, 1>)
+  %filled = linalg.fill ins(%cst : f32) inits(%alloc : tensor<5xf32>) -> tensor<5xf32>
   // CHECK: scf.if %{{.*}} -> (memref<5xf32, 1>) {
   %1 = scf.if %c -> tensor<5xf32> {
     // CHECK: %[[cloned:.*]] = bufferization.clone %[[alloc]]
diff --git a/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir b/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir
--- a/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir
+++ b/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir
@@ -51,7 +51,7 @@
 //       CHECK:  scf.yield %[[B]] : memref<?xf64>
 //       CHECK: }
 //       CHECK: %[[S:.*]] = memref.subview %[[M]]{{\[}}%[[S1]]] {{\[}}%[[D]]] [1]
-//       CHECK: linalg.fill ins(%[[C]] : f64) outs(%[[S]]
+//       CHECK: linalg.fill ins(%[[C]] : f64) inits(%[[S]]
 //       CHECK: return %[[M]], %[[S2]] : memref<?xf64>, index
 func.func @sparse_push_back_n(%arg0: index, %arg1: memref<?xf64>, %arg2: f64, %arg3: index) -> (memref<?xf64>, index) {
   %0:2 = sparse_tensor.push_back %arg0, %arg1, %arg2, %arg3 : index, memref<?xf64>, f64, index
diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir
--- a/mlir/test/Dialect/SparseTensor/codegen.mlir
+++ b/mlir/test/Dialect/SparseTensor/codegen.mlir
@@ -377,8 +377,8 @@
 //       CHECK: %[[B:.*]] = memref.alloc() : memref<8xi1>
 //       CHECK: %[[C:.*]] = memref.alloc() : memref<8xindex>
 //       CHECK: %[[D:.*]] = memref.cast %[[C]] : memref<8xindex> to memref<?xindex>
-//   CHECK-DAG: linalg.fill ins(%{{.*}}  : f64) outs(%[[A]] : memref<8xf64>)
-//   CHECK-DAG: linalg.fill ins(%{{.*}}  : i1) outs(%[[B]] : memref<8xi1>)
+//   CHECK-DAG: linalg.fill ins(%{{.*}}  : f64) inits(%[[A]] : memref<8xf64>)
+//   CHECK-DAG: linalg.fill ins(%{{.*}}  : i1) inits(%[[B]] : memref<8xi1>)
 //       CHECK: return %[[D]] : memref<?xindex>
 func.func @sparse_expansion1() -> memref<?xindex> {
   %0 = bufferization.alloc_tensor() : tensor<4x8xf64, #CSR>
@@ -392,8 +392,8 @@
 //       CHECK: %[[B:.*]] = memref.alloc() : memref<4xi1>
 //       CHECK: %[[C:.*]] = memref.alloc() : memref<4xindex>
 //       CHECK: %[[D:.*]] = memref.cast %[[C]] : memref<4xindex> to memref<?xindex>
-//   CHECK-DAG: linalg.fill ins(%{{.*}}  : f64) outs(%[[A]] : memref<4xf64>)
-//   CHECK-DAG: linalg.fill ins(%{{.*}}  : i1) outs(%[[B]] : memref<4xi1>)
+//   CHECK-DAG: linalg.fill ins(%{{.*}}  : f64) inits(%[[A]] : memref<4xf64>)
+//   CHECK-DAG: linalg.fill ins(%{{.*}}  : i1) inits(%[[B]] : memref<4xi1>)
 //       CHECK: return %[[D]] : memref<?xindex>
 func.func @sparse_expansion2() -> memref<?xindex> {
   %0 = bufferization.alloc_tensor() : tensor<4x8xf64, #CSC>
@@ -408,8 +408,8 @@
 //       CHECK: %[[V:.*]] = memref.alloc(%[[D0]]) : memref<?xf64>
 //       CHECK: %[[B:.*]] = memref.alloc(%[[D0]]) : memref<?xi1>
 //       CHECK: %[[D:.*]] = memref.alloc(%[[D0]]) : memref<?xindex>
-//       CHECK: linalg.fill ins(%{{.*}} : f64) outs(%[[V]] : memref<?xf64>)
-//       CHECK: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<?xi1>)
+//       CHECK: linalg.fill ins(%{{.*}} : f64) inits(%[[V]] : memref<?xf64>)
+//       CHECK: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref<?xi1>)
 //       CHECK: return %[[D]] : memref<?xindex>
 func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref<?xindex> {
   %0 = bufferization.alloc_tensor(%arg0, %arg1) : tensor<?x?xf64, #CSC>
diff --git a/mlir/test/Dialect/SparseTensor/codegen_buffer_initialization.mlir b/mlir/test/Dialect/SparseTensor/codegen_buffer_initialization.mlir
--- a/mlir/test/Dialect/SparseTensor/codegen_buffer_initialization.mlir
+++ b/mlir/test/Dialect/SparseTensor/codegen_buffer_initialization.mlir
@@ -9,13 +9,13 @@
 //       CHECK:     %[[VAL_3:.*]] = arith.constant 0 : index
 //       CHECK:     %[[VAL_4:.*]] = memref.alloc() : memref<16xindex>
 //       CHECK:     %[[VAL_5:.*]] = memref.cast %[[VAL_4]] : memref<16xindex> to memref<?xindex>
-//       CHECK:     linalg.fill ins(%[[VAL_3]] : index) outs(%[[VAL_4]] : memref<16xindex>)
+//       CHECK:     linalg.fill ins(%[[VAL_3]] : index) inits(%[[VAL_4]] : memref<16xindex>)
 //       CHECK:     %[[VAL_6:.*]] = memref.alloc() : memref<16xindex>
 //       CHECK:     %[[VAL_7:.*]] = memref.cast %[[VAL_6]] : memref<16xindex> to memref<?xindex>
-//       CHECK:     linalg.fill ins(%[[VAL_3]] : index) outs(%[[VAL_6]] : memref<16xindex>)
+//       CHECK:     linalg.fill ins(%[[VAL_3]] : index) inits(%[[VAL_6]] : memref<16xindex>)
 //       CHECK:     %[[VAL_8:.*]] = memref.alloc() : memref<16xf64>
 //       CHECK:     %[[VAL_9:.*]] = memref.cast %[[VAL_8]] : memref<16xf64> to memref<?xf64>
-//       CHECK:     linalg.fill ins(%[[VAL_2]] : f64) outs(%[[VAL_8]] : memref<16xf64>)
+//       CHECK:     linalg.fill ins(%[[VAL_2]] : f64) inits(%[[VAL_8]] : memref<16xf64>)
 //       CHECK:     %[[VAL_10:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier
 //       CHECK:     %[[VAL_12:.*]] = sparse_tensor.storage_specifier.set %[[VAL_10]]  lvl_sz at 0 with %[[VAL_0]] : !sparse_tensor.storage_specifier
 //       CHECK:     %[[VAL_14:.*]] = sparse_tensor.storage_specifier.get %[[VAL_12]]  pos_mem_sz at 0 : !sparse_tensor.storage_specifier
diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir
--- a/mlir/test/Dialect/SparseTensor/conversion.mlir
+++ b/mlir/test/Dialect/SparseTensor/conversion.mlir
@@ -332,8 +332,8 @@
 //       CHECK: %[[B:.*]] = memref.alloc() : memref<8xi1>
 //       CHECK: %[[C:.*]] = memref.alloc() : memref<8xindex>
 //       CHECK: %[[D:.*]] = memref.cast %[[C]] : memref<8xindex> to memref<?xindex>
-//   CHECK-DAG: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<8xf64>)
-//   CHECK-DAG: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<8xi1>)
+//   CHECK-DAG: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref<8xf64>)
+//   CHECK-DAG: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref<8xi1>)
 //       CHECK: return %[[D]] : memref<?xindex>
 func.func @sparse_expansion1() -> memref<?xindex> {
   %0 = bufferization.alloc_tensor() : tensor<4x8xf64, #CSR>
@@ -348,8 +348,8 @@
 //       CHECK: %[[B:.*]] = memref.alloc() : memref<4xi1>
 //       CHECK: %[[C:.*]] = memref.alloc() : memref<4xindex>
 //       CHECK: %[[D:.*]] = memref.cast %[[C]] : memref<4xindex> to memref<?xindex>
-//   CHECK-DAG: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<4xf64>)
-//   CHECK-DAG: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<4xi1>)
+//   CHECK-DAG: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref<4xf64>)
+//   CHECK-DAG: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref<4xi1>)
 //       CHECK: return %[[D]] : memref<?xindex>
 func.func @sparse_expansion2() -> memref<?xindex> {
   %0 = bufferization.alloc_tensor() : tensor<4x8xf64, #CSC>
@@ -365,8 +365,8 @@
 //       CHECK: %[[A:.*]] = memref.alloc(%[[S]]) : memref<?xf64>
 //       CHECK: %[[B:.*]] = memref.alloc(%[[S]]) : memref<?xi1>
 //       CHECK: %[[C:.*]] = memref.alloc(%[[S]]) : memref<?xindex>
-//   CHECK-DAG: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<?xf64>)
-//   CHECK-DAG: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<?xi1>)
+//   CHECK-DAG: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref<?xf64>)
+//   CHECK-DAG: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref<?xi1>)
 //       CHECK: return %[[C]] : memref<?xindex>
 func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref<?xindex> {
   %0 = bufferization.alloc_tensor(%arg0, %arg1) : tensor<?x?xf64, #CSC>
diff --git a/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir b/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir
--- a/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir
+++ b/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir
@@ -41,7 +41,7 @@
 //   CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<1xindex> to memref<?xindex>
 //   CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<i32>
 //   CHECK-DAG: %[[M:.*]] = memref.alloc() : memref<13xi32>
-//   CHECK-DAG: linalg.fill ins(%[[C0]] : i32) outs(%[[M]] : memref<13xi32>)
+//   CHECK-DAG: linalg.fill ins(%[[C0]] : i32) inits(%[[M]] : memref<13xi32>)
 //       CHECK: scf.while : () -> () {
 //       CHECK:   %[[Cond:.*]] = func.call @getNextI32(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<i32>) -> i1
 //       CHECK:   scf.condition(%[[Cond]])
@@ -83,7 +83,7 @@
 //   CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<1xindex> to memref<?xindex>
 //   CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<i32>
 //   CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI0]]) : memref<?xi32>
-//   CHECK-DAG: linalg.fill ins(%[[C0]] : i32) outs(%[[M]] : memref<?xi32>)
+//   CHECK-DAG: linalg.fill ins(%[[C0]] : i32) inits(%[[M]] : memref<?xi32>)
 //       CHECK: scf.while : () -> () {
 //       CHECK:   %[[Cond:.*]] = func.call @getNextI32(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<i32>) -> i1
 //       CHECK:   scf.condition(%[[Cond]])
@@ -130,7 +130,7 @@
 //   CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<2xindex> to memref<?xindex>
 //   CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<f64>
 //   CHECK-DAG: %[[M:.*]] = memref.alloc() : memref<2x4xf64>
-//   CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<2x4xf64>)
+//   CHECK-DAG: linalg.fill ins(%[[E0]] : f64) inits(%[[M]] : memref<2x4xf64>)
 //       CHECK: scf.while : () -> () {
 //       CHECK:   %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
 //       CHECK:   scf.condition(%[[Cond]])
@@ -148,7 +148,7 @@
 //  CHECK-RWT-SAME: %[[A:.*]]: tensor<2x4xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>>) -> tensor<2x4xf64> {
 //       CHECK-RWT: %[[F0:.*]] = arith.constant 0.000000e+00 : f64
 //       CHECK-RWT: %[[B:.*]] = memref.alloc() : memref<2x4xf64>
-//       CHECK-RWT: linalg.fill ins(%[[F0]] : f64) outs(%[[B]]
+//       CHECK-RWT: linalg.fill ins(%[[F0]] : f64) inits(%[[B]]
 //       CHECK-RWT: sparse_tensor.foreach in %[[A]]
 //       CHECK-RWT: ^bb0(%[[FI0:.*]]: index, %[[FI1:.*]]: index, %[[FV:.*]]: f64):
 //       CHECK-RWT:   memref.store %[[FV]], %[[B]]{{\[}}%[[FI0]], %[[FI1]]]
@@ -190,7 +190,7 @@
 //   CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<2xindex> to memref<?xindex>
 //   CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<f64>
 //   CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI0]]) : memref<?x4xf64>
-//   CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<?x4xf64>)
+//   CHECK-DAG: linalg.fill ins(%[[E0]] : f64) inits(%[[M]] : memref<?x4xf64>)
 //       CHECK: scf.while : () -> () {
 //       CHECK:   %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
 //       CHECK:   scf.condition(%[[Cond]])
@@ -238,7 +238,7 @@
 //   CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<2xindex> to memref<?xindex>
 //   CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<f64>
 //   CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI1]]) : memref<2x?xf64>
-//   CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<2x?xf64>)
+//   CHECK-DAG: linalg.fill ins(%[[E0]] : f64) inits(%[[M]] : memref<2x?xf64>)
 //       CHECK: scf.while : () -> () {
 //       CHECK:   %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
 //       CHECK:   scf.condition(%[[Cond]])
@@ -286,7 +286,7 @@
 //   CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<2xindex> to memref<?xindex>
 //   CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<f64>
 //   CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI0]], %[[SizeI1]]) : memref<?x?xf64>
-//   CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<?x?xf64>)
+//   CHECK-DAG: linalg.fill ins(%[[E0]] : f64) inits(%[[M]] : memref<?x?xf64>)
 //       CHECK: scf.while : () -> () {
 //       CHECK:   %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
 //       CHECK:   scf.condition(%[[Cond]])
@@ -308,7 +308,7 @@
 //       CHECK-RWT: %[[D0:.*]] = tensor.dim %[[A]], %[[C0]]
 //       CHECK-RWT: %[[D1:.*]] = tensor.dim %[[A]], %[[C1]]
 //       CHECK-RWT: %[[B:.*]] = memref.alloc(%[[D0]], %[[D1]])
-//       CHECK-RWT: linalg.fill ins(%[[F0]] : f64) outs(%[[B]]
+//       CHECK-RWT: linalg.fill ins(%[[F0]] : f64) inits(%[[B]]
 //       CHECK-RWT: sparse_tensor.foreach in %[[A]]
 //       CHECK-RWT: ^bb0(%[[FI0:.*]]: index, %[[FI1:.*]]: index, %[[FV:.*]]: f64):
 //       CHECK-RWT:   memref.store %[[FV]], %[[B]]{{\[}}%[[FI0]], %[[FI1]]]
@@ -354,7 +354,7 @@
 //   CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<3xindex> to memref<?xindex>
 //   CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<f64>
 //   CHECK-DAG: %[[M:.*]] = memref.alloc() : memref<2x3x4xf64>
-//   CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<2x3x4xf64>)
+//   CHECK-DAG: linalg.fill ins(%[[E0]] : f64) inits(%[[M]] : memref<2x3x4xf64>)
 //       CHECK: scf.while : () -> () {
 //       CHECK:   %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
 //       CHECK:   scf.condition(%[[Cond]])
diff --git a/mlir/test/Dialect/SparseTensor/dense.mlir b/mlir/test/Dialect/SparseTensor/dense.mlir
--- a/mlir/test/Dialect/SparseTensor/dense.mlir
+++ b/mlir/test/Dialect/SparseTensor/dense.mlir
@@ -59,7 +59,7 @@
   %c = arith.constant 1.0 : f32
   %0 = linalg.generic #trait_2d
      ins(%arga: tensor<32x16xf32, #DenseMatrix>)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %x: f32):
         %1 = arith.addf %a, %c : f32
         linalg.yield %1 : f32
@@ -99,7 +99,7 @@
   %c = arith.constant 1.0 : f32
   %0 = linalg.generic #trait_2d
      ins(%arga: tensor<32x16xf32>)
-    outs(%argx: tensor<32x16xf32, #DenseMatrix>) {
+    inits(%argx: tensor<32x16xf32, #DenseMatrix>) {
       ^bb(%a: f32, %x: f32):
         %1 = arith.addf %a, %c : f32
         linalg.yield %1 : f32
@@ -145,7 +145,7 @@
 	     -> tensor<32x16xf32, #DenseMatrix> {
   %0 = linalg.generic #trait_3d
      ins(%arga: tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16xf32, #DenseMatrix>) {
+    inits(%argx: tensor<32x16xf32, #DenseMatrix>) {
       ^bb(%a: f32, %x: f32):
         %1 = arith.addf %x, %a : f32
         linalg.yield %1 : f32
diff --git a/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir b/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir
--- a/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir
+++ b/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir
@@ -59,12 +59,12 @@
   -> (tensor<10xf32>, tensor<10xf32>)
 {
   // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() copy(%[[argb]]) {bufferization.escape = [false]} : tensor<10xf32>
-  // CHECK: linalg.generic {{.*}} outs(%[[alloc]]
+  // CHECK: linalg.generic {{.*}} inits(%[[alloc]]
   // CHECK-FUNC: %[[alloc:.*]] = bufferization.alloc_tensor() copy(%[[argb]]) {bufferization.escape = [true]} : tensor<10xf32>
-  // CHECK-FUNC: linalg.generic {{.*}} outs(%[[alloc]]
+  // CHECK-FUNC: linalg.generic {{.*}} inits(%[[alloc]]
   %0 = linalg.generic #trait
   ins(%arga: tensor<10xf32, #SV>)
-  outs(%argb: tensor<10xf32>) {
+  inits(%argb: tensor<10xf32>) {
     ^bb(%a: f32, %x : f32):
       %up = arith.addf %a, %x : f32
       linalg.yield %up : f32
diff --git a/mlir/test/Dialect/SparseTensor/one_trip.mlir b/mlir/test/Dialect/SparseTensor/one_trip.mlir
--- a/mlir/test/Dialect/SparseTensor/one_trip.mlir
+++ b/mlir/test/Dialect/SparseTensor/one_trip.mlir
@@ -25,7 +25,7 @@
 func.func @sparse_scale(%argx: tensor<1x1xf32, #Dense>) -> tensor<1x1xf32, #Dense> {
   %c = arith.constant 2.0 : f32
   %0 = linalg.generic #trait_scale
-    outs(%argx: tensor<1x1xf32, #Dense>) {
+    inits(%argx: tensor<1x1xf32, #Dense>) {
       ^bb(%x: f32):
         %1 = arith.mulf %x, %c : f32
         linalg.yield %1 : f32
diff --git a/mlir/test/Dialect/SparseTensor/rejected.mlir b/mlir/test/Dialect/SparseTensor/rejected.mlir
--- a/mlir/test/Dialect/SparseTensor/rejected.mlir
+++ b/mlir/test/Dialect/SparseTensor/rejected.mlir
@@ -27,7 +27,7 @@
  -> tensor<i32> {
   %0 = linalg.generic #trait
      ins(%arga: tensor<?xi32, #SparseVector>)
-      outs(%argx: tensor<i32>) {
+      inits(%argx: tensor<i32>) {
       ^bb(%a: i32, %x: i32):
         // NOTE: `subi %a, %x` is the reason why the program is rejected by the sparse compiler.
         // It is because we do not allow `-outTensor` in reduction loops as it creates cyclic
diff --git a/mlir/test/Dialect/SparseTensor/sorted_coo.mlir b/mlir/test/Dialect/SparseTensor/sorted_coo.mlir
--- a/mlir/test/Dialect/SparseTensor/sorted_coo.mlir
+++ b/mlir/test/Dialect/SparseTensor/sorted_coo.mlir
@@ -81,7 +81,7 @@
 func.func @sparse_scale(%argx: tensor<?x?xf32, #SortedCOO>) -> tensor<?x?xf32, #SortedCOO> {
   %c = arith.constant 2.0 : f32
   %0 = linalg.generic #trait_scale
-    outs(%argx: tensor<?x?xf32, #SortedCOO>) {
+    inits(%argx: tensor<?x?xf32, #SortedCOO>) {
       ^bb(%x: f32):
         %1 = arith.mulf %x, %c : f32
         linalg.yield %1 : f32
@@ -145,7 +145,7 @@
                   %argx: tensor<32xf64>) -> tensor<32xf64> {
   %0 = linalg.generic #trait_matvec
       ins(%arga, %argb : tensor<32x64xf64, #SortedCOO>, tensor<64xf64>)
-      outs(%argx: tensor<32xf64>) {
+      inits(%argx: tensor<32xf64>) {
     ^bb(%A: f64, %b: f64, %x: f64):
       %0 = arith.mulf %A, %b : f64
       %1 = arith.addf %x, %0 : f64
@@ -171,7 +171,7 @@
 // CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ] }>> to memref<?xindex, strided<[?], offset: ?>>
 // CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ] }>> to memref<?xf64>
 // CHECK:           %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x64xf64>
-// CHECK:           linalg.fill ins(%[[VAL_4]] : f64) outs(%[[VAL_15]] : memref<32x64xf64>)
+// CHECK:           linalg.fill ins(%[[VAL_4]] : f64) inits(%[[VAL_15]] : memref<32x64xf64>)
 // CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_5]]] : memref<?xindex>
@@ -266,7 +266,7 @@
                      %argz: tensor<32x64xf64>) -> tensor<32x64xf64> {
   %0 = linalg.generic #trait_mul
       ins(%argx, %argy : tensor<32x64xf64, #SortedCOO>, tensor<32x64xf64, #SortedCOO>)
-      outs(%argz: tensor<32x64xf64>) {
+      inits(%argz: tensor<32x64xf64>) {
     ^bb(%x: f64, %y: f64, %z: f64):
       %1 = arith.mulf %x, %y : f64
       linalg.yield %1 : f64
diff --git a/mlir/test/Dialect/SparseTensor/sparse_1d.mlir b/mlir/test/Dialect/SparseTensor/sparse_1d.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_1d.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_1d.mlir
@@ -22,7 +22,7 @@
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_2]]
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_8]] : memref<32xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_8]] : memref<32xf32>)
 // CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
 // CHECK:             %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_9]]] : memref<?xf32>
 // CHECK:             %[[VAL_11:.*]] = arith.addf %[[VAL_10]], %[[VAL_1]] : f32
@@ -34,7 +34,7 @@
 func.func @add_d(%arga: tensor<32xf32, #DV>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait1
      ins(%arga: tensor<32xf32, #DV>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %x: f32):
         %0 = arith.addf %a, %argb : f32
         linalg.yield %0 : f32
@@ -52,7 +52,7 @@
 // CHECK:           %[[VAL_INITTENSOR:.*]] = tensor.empty() : tensor<32xf32>
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>> to memref<?xf32>
 // CHECK:           %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_INITTENSOR]] : memref<32xf32>
-// CHECK:           linalg.fill ins(%[[VAL_3]] : f32) outs(%[[VAL_7]] : memref<32xf32>)
+// CHECK:           linalg.fill ins(%[[VAL_3]] : f32) inits(%[[VAL_7]] : memref<32xf32>)
 // CHECK:           scf.for %[[VAL_8:.*]] = %[[VAL_4]] to %[[VAL_2]] step %[[VAL_5]] {
 // CHECK:             %[[VAL_9:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_8]]] : memref<?xf32>
 // CHECK:             %[[VAL_10:.*]] = arith.addf %[[VAL_9]], %[[VAL_1]] : f32
@@ -65,7 +65,7 @@
   %u = tensor.empty() : tensor<32xf32>
   %0 = linalg.generic #trait1
      ins(%arga: tensor<32xf32, #DV>)
-    outs(%u: tensor<32xf32>) {
+    inits(%u: tensor<32xf32>) {
       ^bb(%a: f32, %x: f32):
         %0 = arith.addf %a, %argb : f32
         linalg.yield %0 : f32
@@ -82,7 +82,7 @@
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_2]]
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_8]] : memref<32xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_8]] : memref<32xf32>)
 // CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
 // CHECK:             %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_9]]] : memref<?xf32>
 // CHECK:             %[[VAL_11:.*]] = arith.mulf %[[VAL_10]], %[[VAL_1]] : f32
@@ -94,7 +94,7 @@
 func.func @mul_d(%arga: tensor<32xf32, #DV>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait1
      ins(%arga: tensor<32xf32, #DV>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %x: f32):
         %0 = arith.mulf %a, %argb : f32
         linalg.yield %0 : f32
@@ -116,7 +116,7 @@
 // CHECK-DAG:       %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]]
-// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_11]] : memref<32xf32>)
+// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_11]] : memref<32xf32>)
 // CHECK:           %[[VAL_14:.*]]:2 = scf.while (%[[VAL_15:.*]] = %[[VAL_12]], %[[VAL_16:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
 // CHECK:             %[[VAL_17:.*]] = arith.cmpi ult, %[[VAL_15]], %[[VAL_13]] : index
 // CHECK:             scf.condition(%[[VAL_17]]) %[[VAL_15]], %[[VAL_16]] : index, index
@@ -149,7 +149,7 @@
 func.func @add_s(%arga: tensor<32xf32, #SV>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait1
      ins(%arga: tensor<32xf32, #SV>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %x: f32):
         %0 = arith.addf %a, %argb : f32
         linalg.yield %0 : f32
@@ -168,7 +168,7 @@
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]]
 // CHECK-DAG:       %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_10:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_8]] : memref<32xf32>)
+// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_8]] : memref<32xf32>)
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_3]] {
 // CHECK:             %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_11]]] : memref<?xindex>
 // CHECK:             %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref<?xf32>
@@ -186,7 +186,7 @@
 func.func @repeated_add_s(%arga: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait1
      ins(%arga: tensor<32xf32, #SV>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %x: f32):
         %0 = arith.addf %a, %a : f32  // same tensor
         %1 = arith.addf %a, %a : f32  // should yield
@@ -206,7 +206,7 @@
 // CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]]
-// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_9]] : memref<32xf32>)
+// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_9]] : memref<32xf32>)
 // CHECK-DAG:       %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_10]] to %[[VAL_11]] step %[[VAL_4]] {
@@ -221,7 +221,7 @@
 func.func @mul_s(%arga: tensor<32xf32, #SV>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait1
      ins(%arga: tensor<32xf32, #SV>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %x: f32):
         %0 = arith.mulf %a, %argb : f32
         linalg.yield %0 : f32
@@ -249,7 +249,7 @@
 // CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]]
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_9]] : memref<32xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_9]] : memref<32xf32>)
 // CHECK:           scf.for %[[VAL_10:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
 // CHECK:             %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref<?xf32>
 // CHECK:             %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_10]]] : memref<32xf32>
@@ -262,7 +262,7 @@
 func.func @add_dd(%arga: tensor<32xf32, #DV>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xf32, #DV>, tensor<32xf32>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -280,7 +280,7 @@
 // CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]]
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_9]] : memref<32xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_9]] : memref<32xf32>)
 // CHECK:           scf.for %[[VAL_10:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
 // CHECK:             %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref<?xf32>
 // CHECK:             %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_10]]] : memref<32xf32>
@@ -293,7 +293,7 @@
 func.func @mul_dd(%arga: tensor<32xf32, #DV>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xf32, #DV>, tensor<32xf32>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -314,7 +314,7 @@
 // CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]]
-// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
+// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_12]] : memref<32xf32>)
 // CHECK-DAG:       %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_14:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK:           %[[VAL_15:.*]]:2 = scf.while (%[[VAL_16:.*]] = %[[VAL_13]], %[[VAL_17:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
@@ -352,7 +352,7 @@
 func.func @add_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xf32>, tensor<32xf32, #SV>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -371,7 +371,7 @@
 // CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]]
-// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32xf32>)
+// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_10]] : memref<32xf32>)
 // CHECK-DAG:       %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_13:.*]] = %[[VAL_11]] to %[[VAL_12]] step %[[VAL_4]] {
@@ -387,7 +387,7 @@
 func.func @mul_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xf32>, tensor<32xf32, #SV>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -408,7 +408,7 @@
 // CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]]
-// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
+// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_12]] : memref<32xf32>)
 // CHECK-DAG:       %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK:           %[[VAL_15:.*]]:2 = scf.while (%[[VAL_16:.*]] = %[[VAL_13]], %[[VAL_17:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
@@ -446,7 +446,7 @@
 func.func @add_sd(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xf32, #SV>, tensor<32xf32>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -465,7 +465,7 @@
 // CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]]
-// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32xf32>)
+// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_10]] : memref<32xf32>)
 // CHECK-DAG:       %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_13:.*]] = %[[VAL_11]] to %[[VAL_12]] step %[[VAL_4]] {
@@ -481,7 +481,7 @@
 func.func @mul_sd(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xf32, #SV>, tensor<32xf32>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -502,7 +502,7 @@
 // CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]]
-// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
+// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_12]] : memref<32xf32>)
 // CHECK-DAG:       %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -564,7 +564,7 @@
 func.func @add_ss(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xf32, #SV>, tensor<32xf32, #SV>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -585,7 +585,7 @@
 // CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]]
-// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
+// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_12]] : memref<32xf32>)
 // CHECK-DAG:       %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -625,7 +625,7 @@
 func.func @mul_ss(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xf32, #SV>, tensor<32xf32, #SV>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -647,7 +647,7 @@
 // CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]]
-// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<16xf32>)
+// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_13]] : memref<16xf32>)
 // CHECK-DAG:       %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -716,7 +716,7 @@
   // Kernel "x(i) = a(i) * c + b(i) * c".
   %0 = linalg.generic #trait2
     ins(%arga, %argb: tensor<16xf32, #SV>, tensor<16xf32, #SV>)
-    outs(%argx: tensor<16xf32>) {
+    inits(%argx: tensor<16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %argc : f32
         %1 = arith.mulf %b, %argc : f32
@@ -740,7 +740,7 @@
 // CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]]
-// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<16xf32>)
+// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_13]] : memref<16xf32>)
 // CHECK-DAG:       %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -809,7 +809,7 @@
   // Same kernel, but now expressed as "x(i) = (a(i) + b(i)) * c".
   %0 = linalg.generic #trait2
     ins(%arga, %argb: tensor<16xf32, #SV>, tensor<16xf32, #SV>)
-    outs(%argx: tensor<16xf32>) {
+    inits(%argx: tensor<16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         %1 = arith.mulf %0, %argc : f32
@@ -850,7 +850,7 @@
 func.func @sum_reduction(%arga: tensor<?xf32, #SV>, %argx: tensor<f32>) -> tensor<f32> {
   %0 = linalg.generic #trait_sum_reduction
     ins(%arga: tensor<?xf32, #SV>)
-    outs(%argx: tensor<f32>) {
+    inits(%argx: tensor<f32>) {
       ^bb(%a: f32, %x: f32):
         %0 = arith.addf %x, %a : f32
         linalg.yield %0 : f32
@@ -954,7 +954,7 @@
   // as two separate reductions kernels.
   %0 = linalg.generic #trait_sum_reduction2
     ins(%arga, %argb: tensor<16xf32, #SV>, tensor<16xf32, #SV>)
-    outs(%argx: tensor<f32>) {
+    inits(%argx: tensor<f32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         %1 = arith.addf %x, %0 : f32
@@ -1067,7 +1067,7 @@
   // as two separate reductions kernels.
   %0 = linalg.generic #trait_sum_reduction_inv
     ins(%arga, %argb, %argc : tensor<16xf32, #SV>, tensor<f32>, tensor<16xf32, #SV>)
-    outs(%argx: tensor<f32>) {
+    inits(%argx: tensor<f32>) {
       ^bb(%a: f32, %b: f32, %c: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         %1 = arith.addf %0, %c : f32
@@ -1108,7 +1108,7 @@
 // CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.values %[[VAL_3]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf64>
 // CHECK-DAG:       %[[VAL_16:.*]] = tensor.dim %[[VAL_0]], %[[VAL_5]] : tensor<?xf64>
 // CHECK-DAG:       %[[VAL_18:.*]] = bufferization.to_memref %[[VAL_4]]
-// CHECK-DAG:       linalg.fill ins(%{{.*}} : f64) outs(%[[VAL_18]] : memref<?xf64>)
+// CHECK-DAG:       linalg.fill ins(%{{.*}} : f64) inits(%[[VAL_18]] : memref<?xf64>)
 // CHECK-DAG:       %[[VAL_19:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_20:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_21:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_5]]] : memref<?xindex>
@@ -1246,7 +1246,7 @@
                       %argx: tensor<?xf64>) -> tensor<?xf64> {
   %r = linalg.generic #trait_four_tensors
     ins(%arga, %argb, %argc, %argd: tensor<?xf64>, tensor<?xf64, #SV>, tensor<?xf64>, tensor<?xf64, #SV>)
-    outs(%argx: tensor<?xf64>) {
+    inits(%argx: tensor<?xf64>) {
       ^bb(%a: f64, %b: f64, %c: f64, %d: f64, %x: f64):
         %0 = arith.addf %a, %b : f64
         %1 = arith.addf %c, %d : f64
@@ -1559,7 +1559,7 @@
 	    %argc: tensor<?xf64, #SV>, %argx: tensor<f64>) ->tensor<f64>{
  %0 = linalg.generic #trait_red3s
    ins(%arga, %argb, %argc: tensor<?xf64, #SV>, tensor<?xf64, #SV>, tensor<?xf64, #SV>)
-   outs(%argx: tensor<f64>) {
+   inits(%argx: tensor<f64>) {
      ^bb(%a: f64,%b: f64,%c: f64,%x: f64):
         %0 = arith.addf %x, %a : f64
         %1 = arith.addf %0, %b : f64
diff --git a/mlir/test/Dialect/SparseTensor/sparse_2d.mlir b/mlir/test/Dialect/SparseTensor/sparse_2d.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_2d.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_2d.mlir
@@ -27,7 +27,7 @@
 // CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32x16xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_10]] : memref<32x16xf32>)
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
 // CHECK:             scf.for %[[VAL_12:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
 // CHECK:               %[[VAL_13:.*]] = arith.muli %[[VAL_11]], %[[VAL_4]] : index
@@ -44,7 +44,7 @@
 func.func @add_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32x16xf32, #Tdd>, tensor<32x16xf32>)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -63,7 +63,7 @@
 // CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32x16xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_10]] : memref<32x16xf32>)
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
 // CHECK:             scf.for %[[VAL_12:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
 // CHECK:               %[[VAL_13:.*]] = arith.muli %[[VAL_11]], %[[VAL_4]] : index
@@ -80,7 +80,7 @@
 func.func @mul_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32x16xf32, #Tdd>, tensor<32x16xf32>)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -102,7 +102,7 @@
 // CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<32x16xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_13]] : memref<32x16xf32>)
 // CHECK:           scf.for %[[VAL_14:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_7]] {
 // CHECK:             %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_14]]] : memref<?xindex>
 // CHECK:             %[[VAL_16:.*]] = arith.addi %[[VAL_14]], %[[VAL_7]] : index
@@ -143,7 +143,7 @@
 func.func @add_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32x16xf32, #Tds>, tensor<32x16xf32>)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -163,7 +163,7 @@
 // CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_11]] : memref<32x16xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_11]] : memref<32x16xf32>)
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
 // CHECK:             %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref<?xindex>
 // CHECK:             %[[VAL_14:.*]] = arith.addi %[[VAL_12]], %[[VAL_5]] : index
@@ -182,7 +182,7 @@
 func.func @mul_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32x16xf32, #Tds>, tensor<32x16xf32>)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -204,7 +204,7 @@
 // CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<32x16xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_13]] : memref<32x16xf32>)
 // CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_7]]] : memref<?xindex>
 // CHECK:           %[[VAL_16:.*]]:2 = scf.while (%[[VAL_17:.*]] = %[[VAL_14]], %[[VAL_18:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) {
@@ -250,7 +250,7 @@
 func.func @add_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32x16xf32, #Tsd>, tensor<32x16xf32>)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -270,7 +270,7 @@
 // CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_11]] : memref<32x16xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_11]] : memref<32x16xf32>)
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_14:.*]] = %[[VAL_12]] to %[[VAL_13]] step %[[VAL_5]] {
@@ -290,7 +290,7 @@
 func.func @mul_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32x16xf32, #Tsd>, tensor<32x16xf32>)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -314,7 +314,7 @@
 // CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_15]] : memref<32x16xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_15]] : memref<32x16xf32>)
 // CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_7]]] : memref<?xindex>
 // CHECK:           %[[VAL_18:.*]]:2 = scf.while (%[[VAL_19:.*]] = %[[VAL_16]], %[[VAL_20:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) {
@@ -384,7 +384,7 @@
 func.func @add_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32x16xf32, #Tss>, tensor<32x16xf32>)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -405,7 +405,7 @@
 // CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32x16xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_12]] : memref<32x16xf32>)
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_15:.*]] = %[[VAL_13]] to %[[VAL_14]] step %[[VAL_4]] {
@@ -427,7 +427,7 @@
 func.func @mul_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32x16xf32, #Tss>, tensor<32x16xf32>)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -452,7 +452,7 @@
 // CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_16]] : memref<32x16xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_16]] : memref<32x16xf32>)
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -591,7 +591,7 @@
 func.func @add_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #Tss>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32x16xf32, #Tss>, tensor<32x16xf32, #Tss>)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -616,7 +616,7 @@
 // CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_16]] : memref<32x16xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_16]] : memref<32x16xf32>)
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -687,7 +687,7 @@
 func.func @mul_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #Tss>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32x16xf32, #Tss>, tensor<32x16xf32, #Tss>)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -711,7 +711,7 @@
 // CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_15]] : memref<32x16xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_15]] : memref<32x16xf32>)
 // CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_7]]] : memref<?xindex>
 // CHECK:           %[[VAL_18:.*]]:2 = scf.while (%[[VAL_19:.*]] = %[[VAL_16]], %[[VAL_20:.*]] = %[[VAL_5]]) : (index, index) -> (index, index) {
@@ -793,7 +793,7 @@
 func.func @add_sd_ds(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32, #Tds>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32x16xf32, #Tsd>, tensor<32x16xf32, #Tds>)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -815,7 +815,7 @@
 // CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<32x16xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_13]] : memref<32x16xf32>)
 // CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_16:.*]] = %[[VAL_14]] to %[[VAL_15]] step %[[VAL_5]] {
@@ -839,7 +839,7 @@
 func.func @mul_sd_ds(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32, #Tds>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32x16xf32, #Tsd>, tensor<32x16xf32, #Tds>)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -890,7 +890,7 @@
 func.func @matvec(%argA: tensor<16x32xf32, #Tds>, %argb: tensor<32xf32>, %argx: tensor<16xf32>) -> tensor<16xf32> {
   %0 = linalg.generic #trait_matvec
        ins(%argA, %argb: tensor<16x32xf32, #Tds>, tensor<32xf32>)
-      outs(%argx: tensor<16xf32>) {
+      inits(%argx: tensor<16xf32>) {
     ^bb(%A: f32, %b: f32, %x: f32):
       %0 = arith.mulf %A, %b : f32
       %1 = arith.addf %0, %x : f32
@@ -936,7 +936,7 @@
 func.func @sum_reduction(%arga: tensor<10x20xf32, #Tds>, %argx: tensor<f32>) -> tensor<f32> {
   %0 = linalg.generic #trait_sum_reduction
      ins(%arga: tensor<10x20xf32, #Tds>)
-    outs(%argx: tensor<f32>) {
+    inits(%argx: tensor<f32>) {
       ^bb(%a: f32, %x: f32):
         %0 = arith.addf %x, %a : f32
         linalg.yield %0 : f32
@@ -964,7 +964,7 @@
 // CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref<?xf64>
 // CHECK-DAG:       %[[VAL_8:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<?x?xf64>
-// CHECK:           linalg.fill ins(%{{.*}} : f64) outs(%[[VAL_11]] : memref<?x?xf64>)
+// CHECK:           linalg.fill ins(%{{.*}} : f64) inits(%[[VAL_11]] : memref<?x?xf64>)
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_3]] to %[[VAL_8]] step %[[VAL_4]] {
 // CHECK:             %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_12]]] : memref<?xindex>
 // CHECK:             %[[VAL_14:.*]] = arith.addi %[[VAL_12]], %[[VAL_4]] : index
@@ -983,7 +983,7 @@
   %0 = arith.constant 2.0 : f64
   %1 = linalg.generic #trait_scale
      ins(%arga: tensor<?x?xf64, #Tds>)
-    outs(%argx: tensor<?x?xf64>) {
+    inits(%argx: tensor<?x?xf64>) {
       ^bb(%a: f64, %x: f64):
         %2 = arith.mulf %a, %0 : f64
         linalg.yield %2 : f64
@@ -1048,7 +1048,7 @@
                           %argx: tensor<?x?xf32>) -> tensor<?x?xf32> {
   %0 = linalg.generic #trait_sampled_dense_dense
      ins(%args, %arga, %argb: tensor<?x?xf32, #Tss>, tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%argx: tensor<?x?xf32>) {
+    inits(%argx: tensor<?x?xf32>) {
       ^bb(%s: f32, %a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         %1 = arith.mulf %s, %0 : f32
@@ -1272,7 +1272,7 @@
                                             tensor<?x?xf32, #Tds>,
                                             tensor<?xf32>,
                                             tensor<f32>)
-    outs(%argx: tensor<?xf32>) {
+    inits(%argx: tensor<?xf32>) {
       ^bb(%a: f32, %b: f32, %c: f32, %d: f32, %e: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         %1 = arith.mulf %0, %d : f32
diff --git a/mlir/test/Dialect/SparseTensor/sparse_3d.mlir b/mlir/test/Dialect/SparseTensor/sparse_3d.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_3d.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_3d.mlir
@@ -35,7 +35,7 @@
 // CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_11]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_11]] : memref<32x16x8xf32>)
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] {
 // CHECK:             scf.for %[[VAL_13:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] {
 // CHECK:               %[[VAL_14:.*]] = arith.muli %[[VAL_12]], %[[VAL_4]] : index
@@ -56,7 +56,7 @@
 func.func @add_ddd(%arga: tensor<32x16x8xf32, #Tddd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tddd>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -77,7 +77,7 @@
 // CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_11]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_11]] : memref<32x16x8xf32>)
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] {
 // CHECK:             scf.for %[[VAL_13:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] {
 // CHECK:               %[[VAL_14:.*]] = arith.muli %[[VAL_12]], %[[VAL_4]] : index
@@ -98,7 +98,7 @@
 func.func @mul_ddd(%arga: tensor<32x16x8xf32, #Tddd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tddd>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -122,7 +122,7 @@
 // CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_15]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_15]] : memref<32x16x8xf32>)
 // CHECK:           scf.for %[[VAL_16:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_9]] {
 // CHECK:             scf.for %[[VAL_17:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_9]] {
 // CHECK:               %[[VAL_18:.*]] = arith.muli %[[VAL_16]], %[[VAL_5]] : index
@@ -167,7 +167,7 @@
 func.func @add_dds(%arga: tensor<32x16x8xf32, #Tdds>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tdds>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -189,7 +189,7 @@
 // CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_13]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_13]] : memref<32x16x8xf32>)
 // CHECK:           scf.for %[[VAL_14:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] {
 // CHECK:             scf.for %[[VAL_15:.*]] = %[[VAL_6]] to %[[VAL_5]] step %[[VAL_7]] {
 // CHECK:               %[[VAL_16:.*]] = arith.muli %[[VAL_14]], %[[VAL_5]] : index
@@ -212,7 +212,7 @@
 func.func @mul_dds(%arga: tensor<32x16x8xf32, #Tdds>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tdds>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -236,7 +236,7 @@
 // CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_14]] : memref<32x16x8xf32>)
 // CHECK:           scf.for %[[VAL_15:.*]] = %[[VAL_7]] to %[[VAL_3]] step %[[VAL_8]] {
 // CHECK:             %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_15]]] : memref<?xindex>
 // CHECK:             %[[VAL_17:.*]] = arith.addi %[[VAL_15]], %[[VAL_8]] : index
@@ -285,7 +285,7 @@
 func.func @add_dsd(%arga: tensor<32x16x8xf32, #Tdsd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tdsd>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -307,7 +307,7 @@
 // CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_12]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_12]] : memref<32x16x8xf32>)
 // CHECK:           scf.for %[[VAL_13:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
 // CHECK:             %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref<?xindex>
 // CHECK:             %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_6]] : index
@@ -330,7 +330,7 @@
 func.func @mul_dsd(%arga: tensor<32x16x8xf32, #Tdsd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tdsd>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -356,7 +356,7 @@
 // CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_17]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_17]] : memref<32x16x8xf32>)
 // CHECK:           scf.for %[[VAL_18:.*]] = %[[VAL_8]] to %[[VAL_4]] step %[[VAL_9]] {
 // CHECK:             %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_18]]] : memref<?xindex>
 // CHECK:             %[[VAL_20:.*]] = arith.addi %[[VAL_18]], %[[VAL_9]] : index
@@ -429,7 +429,7 @@
 func.func @add_dss(%arga: tensor<32x16x8xf32, #Tdss>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tdss>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -452,7 +452,7 @@
 // CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_14]] : memref<32x16x8xf32>)
 // CHECK:           scf.for %[[VAL_15:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
 // CHECK:             %[[VAL_16:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_15]]] : memref<?xindex>
 // CHECK:             %[[VAL_17:.*]] = arith.addi %[[VAL_15]], %[[VAL_6]] : index
@@ -477,7 +477,7 @@
 func.func @mul_dss(%arga: tensor<32x16x8xf32, #Tdss>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tdss>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -501,7 +501,7 @@
 // CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_14]] : memref<32x16x8xf32>)
 // CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref<?xindex>
 // CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_8]]] : memref<?xindex>
 // CHECK:           %[[VAL_17:.*]]:2 = scf.while (%[[VAL_18:.*]] = %[[VAL_15]], %[[VAL_19:.*]] = %[[VAL_7]]) : (index, index) -> (index, index) {
@@ -555,7 +555,7 @@
 func.func @add_sdd(%arga: tensor<32x16x8xf32, #Tsdd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tsdd>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -577,7 +577,7 @@
 // CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_12]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_12]] : memref<32x16x8xf32>)
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_15:.*]] = %[[VAL_13]] to %[[VAL_14]] step %[[VAL_6]] {
@@ -601,7 +601,7 @@
 func.func @mul_sdd(%arga: tensor<32x16x8xf32, #Tsdd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tsdd>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -627,7 +627,7 @@
 // CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_17]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_17]] : memref<32x16x8xf32>)
 // CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_8]]] : memref<?xindex>
 // CHECK:           %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_9]]] : memref<?xindex>
 // CHECK:           %[[VAL_20:.*]]:2 = scf.while (%[[VAL_21:.*]] = %[[VAL_18]], %[[VAL_22:.*]] = %[[VAL_8]]) : (index, index) -> (index, index) {
@@ -705,7 +705,7 @@
 func.func @add_sds(%arga: tensor<32x16x8xf32, #Tsds>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tsds>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -728,7 +728,7 @@
 // CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_14]] : memref<32x16x8xf32>)
 // CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_17:.*]] = %[[VAL_15]] to %[[VAL_16]] step %[[VAL_6]] {
@@ -754,7 +754,7 @@
 func.func @mul_sds(%arga: tensor<32x16x8xf32, #Tsds>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tsds>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -780,7 +780,7 @@
 // CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_16]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_16]] : memref<32x16x8xf32>)
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref<?xindex>
 // CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_8]]] : memref<?xindex>
 // CHECK:           %[[VAL_19:.*]]:2 = scf.while (%[[VAL_20:.*]] = %[[VAL_17]], %[[VAL_21:.*]] = %[[VAL_7]]) : (index, index) -> (index, index) {
@@ -862,7 +862,7 @@
 func.func @add_ssd(%arga: tensor<32x16x8xf32, #Tssd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tssd>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -885,7 +885,7 @@
 // CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_13]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_13]] : memref<32x16x8xf32>)
 // CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_16:.*]] = %[[VAL_14]] to %[[VAL_15]] step %[[VAL_5]] {
@@ -911,7 +911,7 @@
 func.func @mul_ssd(%arga: tensor<32x16x8xf32, #Tssd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tssd>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -939,7 +939,7 @@
 // CHECK-DAG:       %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_19:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_19]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_19]] : memref<32x16x8xf32>)
 // CHECK:           %[[VAL_20:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_8]]] : memref<?xindex>
 // CHECK:           %[[VAL_21:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_9]]] : memref<?xindex>
 // CHECK:           %[[VAL_22:.*]]:2 = scf.while (%[[VAL_23:.*]] = %[[VAL_20]], %[[VAL_24:.*]] = %[[VAL_8]]) : (index, index) -> (index, index) {
@@ -1045,7 +1045,7 @@
 func.func @add_sss(%arga: tensor<32x16x8xf32, #Tsss>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tsss>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -1069,7 +1069,7 @@
 // CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_15]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_15]] : memref<32x16x8xf32>)
 // CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_18:.*]] = %[[VAL_16]] to %[[VAL_17]] step %[[VAL_5]] {
@@ -1097,7 +1097,7 @@
 func.func @mul_sss(%arga: tensor<32x16x8xf32, #Tsss>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tsss>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -1163,7 +1163,7 @@
 	        %argd: tensor<?x?xf32>) -> tensor<?x?xf32> {
   %0 = linalg.generic #trait_kernel_3d
        ins(%argb, %argc, %argd: tensor<?x?x?xf32, #Tdds>, tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%arga: tensor<?x?xf32>) {
+      inits(%arga: tensor<?x?xf32>) {
     ^bb(%b: f32, %c: f32, %d: f32, %a: f32):
       %0 = arith.mulf %b, %c : f32
       %1 = arith.mulf %0, %d : f32
@@ -1219,7 +1219,7 @@
 func.func @sum_reduction(%arga: tensor<10x20x30xf32, #Tsss>, %argx: tensor<f32>) -> tensor<f32> {
   %0 = linalg.generic #trait_sum_reduction
      ins(%arga: tensor<10x20x30xf32, #Tsss>)
-    outs(%argx: tensor<f32>) {
+    inits(%argx: tensor<f32>) {
       ^bb(%a: f32, %x: f32):
         %0 = arith.addf %x, %a : f32
         linalg.yield %0 : f32
@@ -1273,7 +1273,7 @@
 		        %argx: tensor<f32>) -> tensor<f32> {
   %0 = linalg.generic #trait_sum_reduction_inv
     ins(%arga, %argb: tensor<?x?x?xf32>, tensor<?xf32, #Td>)
-    outs(%argx: tensor<f32>) {
+    inits(%argx: tensor<f32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         %1 = arith.addf %x, %0 : f32
@@ -1308,7 +1308,7 @@
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<20xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<30xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]] : memref<10x20x30xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_13]] : memref<10x20x30xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_13]] : memref<10x20x30xf32>)
 // CHECK:           scf.for %[[VAL_14:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_8]] {
 // CHECK:             %[[VAL_15:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_14]]] : memref<?xf32>
 // CHECK:             scf.for %[[VAL_16:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
@@ -1330,7 +1330,7 @@
                  %argx: tensor<10x20x30xf32>) -> tensor<10x20x30xf32> {
   %0 = linalg.generic #trait_invariants
      ins(%arga, %argb, %argc : tensor<10xf32, #Td>, tensor<20xf32>, tensor<30xf32>)
-    outs(%argx: tensor<10x20x30xf32>) {
+    inits(%argx: tensor<10x20x30xf32>) {
       ^bb(%a: f32, %b: f32, %c: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         %1 = arith.mulf %0, %c : f32
diff --git a/mlir/test/Dialect/SparseTensor/sparse_affine.mlir b/mlir/test/Dialect/SparseTensor/sparse_affine.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_affine.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_affine.mlir
@@ -47,7 +47,7 @@
                       %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait1
      ins(%arga, %argb: tensor<32xf32, #SpVec>, tensor<4xf32>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         %1 = arith.addf %x, %0 : f32
@@ -98,7 +98,7 @@
   %argx = bufferization.alloc_tensor() : tensor<32xf32, #SpVec>
   %0 = linalg.generic #trait1
      ins(%arga, %argb: tensor<32xf32, #SpVec>, tensor<4xf32, #SpVec>)
-    outs(%argx: tensor<32xf32, #SpVec>) {
+    inits(%argx: tensor<32xf32, #SpVec>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         %1 = arith.addf %x, %0 : f32
@@ -135,7 +135,7 @@
   %argx = bufferization.alloc_tensor() : tensor<32xf32, #EncDenseVec>
   %0 = linalg.generic #trait1
      ins(%arga, %argb: tensor<32xf32, #EncDenseVec>, tensor<4xf32, #EncDenseVec>)
-    outs(%argx: tensor<32xf32, #EncDenseVec>) {
+    inits(%argx: tensor<32xf32, #EncDenseVec>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         %1 = arith.addf %x, %0 : f32
@@ -167,7 +167,7 @@
 // CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<34xi32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : i32) outs(%[[VAL_11]] : memref<32xi32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : i32) inits(%[[VAL_11]] : memref<32xi32>)
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_14:.*]] = %[[VAL_12]] to %[[VAL_13]] step %[[VAL_4]] {
@@ -186,7 +186,7 @@
                          %argx: tensor<32xi32>) -> tensor<32xi32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xi32, #SpVec>, tensor<34xi32>)
-    outs(%argx: tensor<32xi32>) {
+    inits(%argx: tensor<32xi32>) {
       ^bb(%a: i32, %b: i32, %x: i32):
         %0 = arith.andi %a, %b : i32
         linalg.yield %0 : i32
@@ -237,7 +237,7 @@
   %argx = bufferization.alloc_tensor() : tensor<32xi32, #SpVec>
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xi32, #SpVec>, tensor<34xi32, #SpVec>)
-    outs(%argx: tensor<32xi32, #SpVec>) {
+    inits(%argx: tensor<32xi32, #SpVec>) {
       ^bb(%a: i32, %b: i32, %x: i32):
         %0 = arith.andi %a, %b : i32
         linalg.yield %0 : i32
@@ -293,7 +293,7 @@
                          %argx: tensor<32x16xf64>) -> tensor<32x16xf64> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16xf64, #CSR>, tensor<34x19xf64>)
-    outs(%argx: tensor<32x16xf64>) {
+    inits(%argx: tensor<32x16xf64>) {
       ^bb(%a: f64, %b: f64, %x: f64):
         %0 = arith.mulf %a, %b : f64
         %1 = arith.addf %x, %0 : f64
@@ -363,7 +363,7 @@
   %argx = bufferization.alloc_tensor() : tensor<32x16xf64, #CSR>
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16xf64, #CSR>, tensor<34x19xf64, #CSR>)
-    outs(%argx: tensor<32x16xf64, #CSR>) {
+    inits(%argx: tensor<32x16xf64, #CSR>) {
       ^bb(%a: f64, %b: f64, %x: f64):
         %0 = arith.mulf %a, %b : f64
         %1 = arith.addf %x, %0 : f64
@@ -427,7 +427,7 @@
                                    %argx: tensor<32x16xf64>) -> tensor<32x16xf64> {
   %0 = linalg.generic #trait4
      ins(%arga, %argb: tensor<34x16xf64, #CSR>, tensor<32x19xf64, #Row>)
-    outs(%argx: tensor<32x16xf64>) {
+    inits(%argx: tensor<32x16xf64>) {
       ^bb(%a: f64, %b: f64, %x: f64):
         %0 = arith.mulf %a, %b : f64
         %1 = arith.addf %x, %0 : f64
@@ -488,7 +488,7 @@
                                          %argx: tensor<32x16xf64>) -> tensor<32x16xf64> {
   %0 = linalg.generic #trait5
      ins(%arga, %argb: tensor<34x16xf64, #CSR>, tensor<32x19xf64, #Row>)
-    outs(%argx: tensor<32x16xf64>) {
+    inits(%argx: tensor<32x16xf64>) {
       ^bb(%a: f64, %b: f64, %x: f64):
         %0 = arith.mulf %a, %b : f64
         %1 = arith.addf %x, %0 : f64
diff --git a/mlir/test/Dialect/SparseTensor/sparse_broadcast.mlir b/mlir/test/Dialect/SparseTensor/sparse_broadcast.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_broadcast.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_broadcast.mlir
@@ -46,7 +46,7 @@
   func.func public @main(%arg0: tensor<4x5xi32, #DCSR>) -> tensor<4x3x5xi32, #SparseTensor> {
     %0 = bufferization.alloc_tensor() : tensor<4x3x5xi32, #SparseTensor>
     %1 = linalg.generic #trait
-    ins(%arg0 : tensor<4x5xi32, #DCSR>) outs(%0 : tensor<4x3x5xi32, #SparseTensor>) {
+    ins(%arg0 : tensor<4x5xi32, #DCSR>) inits(%0 : tensor<4x3x5xi32, #SparseTensor>) {
     ^bb0(%in: i32, %out: i32):
       linalg.yield %in : i32
     } -> tensor<4x3x5xi32, #SparseTensor>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir
@@ -26,7 +26,7 @@
 // CHECK-DAG:     %[[TMP_c0:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[TMP_c4:.*]] = arith.constant 4 : index
 // CHECK:         %[[TMP_0:.*]] = memref.alloc() : memref<5x4xf64>
-// CHECK:         linalg.fill ins(%[[TMP_cst]] : f64) outs(%[[TMP_0]] : memref<5x4xf64>)
+// CHECK:         linalg.fill ins(%[[TMP_cst]] : f64) inits(%[[TMP_0]] : memref<5x4xf64>)
 // CHECK:         scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] {
 // CHECK:           scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c4]] step %[[TMP_c1]] {
 // CHECK:             %[[TMP_12:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<2x4xf64>
@@ -267,7 +267,7 @@
 // CHECK-DAG:         %[[TMP_c0:.*]] = arith.constant 0 : index
 // CHECK-DAG:         %[[TMP_c4:.*]] = arith.constant 4 : index
 // CHECK:         %[[TMP_0:.*]] = memref.alloc() : memref<4x5xf64>
-// CHECK:         linalg.fill ins(%[[TMP_cst]] : f64) outs(%[[TMP_0]] : memref<4x5xf64>)
+// CHECK:         linalg.fill ins(%[[TMP_cst]] : f64) inits(%[[TMP_0]] : memref<4x5xf64>)
 // CHECK:         scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c4]] step %[[TMP_c1]] {
 // CHECK:           scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] {
 // CHECK:             %[[TMP_12:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<4x2xf64>
@@ -330,7 +330,7 @@
 // CHECK-DAG:       %[[TMP_c1:.*]] = arith.constant 1 : index
 // CHECK:           %[[TMP_0:.*]] = memref.alloc() : memref<3x5xf64>
 // CHECK:           %[[TMP_1:.*]] = memref.cast %[[TMP_0]] : memref<3x5xf64> to memref<?x?xf64>
-// CHECK:           linalg.fill ins(%[[TMP_cst]] : f64) outs(%[[TMP_0]] : memref<3x5xf64>)
+// CHECK:           linalg.fill ins(%[[TMP_cst]] : f64) inits(%[[TMP_0]] : memref<3x5xf64>)
 // CHECK:           scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c3]] step %[[TMP_c1]] {
 // CHECK:             scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] {
 // CHECK:               %[[TMP_13:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<3x2xf64>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir b/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir
@@ -189,7 +189,7 @@
 //   CHECK-DAG:  %[[TMP_c4:.*]] = arith.constant 4 : index
 //   CHECK-DAG:  %[[TMP_d0:.*]] = arith.constant 0.000000e+00 : f64
 //       CHECK:  %[[A:.*]] = memref.alloc(%[[TMP_c9]], %[[TMP_c4]]) : memref<?x?xf64>
-//       CHECK:  linalg.fill ins(%[[TMP_d0]] : f64) outs(%[[A]] : memref<?x?xf64>)
+//       CHECK:  linalg.fill ins(%[[TMP_d0]] : f64) inits(%[[A]] : memref<?x?xf64>)
 //       CHECK:  %[[TMP_1:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor
 //       CHECK:  %[[TMP_2:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor
 //       CHECK:  %[[TMP_3:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor
diff --git a/mlir/test/Dialect/SparseTensor/sparse_expand.mlir b/mlir/test/Dialect/SparseTensor/sparse_expand.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_expand.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_expand.mlir
@@ -54,8 +54,8 @@
 // CHECK-CONVERT: %[[A:.*]] = memref.alloc(%[[S]]) : memref<?xf64>
 // CHECK-CONVERT: %[[B:.*]] = memref.alloc(%[[S]]) : memref<?xi1>
 // CHECK-CONVERT: %[[C:.*]] = memref.alloc(%[[S]]) : memref<?xindex>
-// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<?xf64>)
-// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<?xi1>)
+// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref<?xf64>)
+// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref<?xi1>)
 // CHECK-CONVERT: scf.for {{.*}} {
 // CHECK-CONVERT:   scf.for {{.*}} {
 // CHECK-CONVERT:   }
@@ -72,7 +72,7 @@
   %v = bufferization.alloc_tensor(%n) : tensor<?xf64, #SV>
   %0 = linalg.generic #rowsum
     ins(%arga: tensor<?x?xf64, #DCSC>)
-    outs(%v: tensor<?xf64, #SV>) {
+    inits(%v: tensor<?xf64, #SV>) {
     ^bb(%a: f64, %x: f64):
       %1 = arith.addf %x, %a : f64
       linalg.yield %1 : f64
@@ -105,8 +105,8 @@
 // CHECK-CONVERT: %[[A:.*]] = memref.alloc(%[[C4]]) : memref<?xf64>
 // CHECK-CONVERT: %[[B:.*]] = memref.alloc(%[[C4]]) : memref<?xi1>
 // CHECK-CONVERT: %[[C:.*]] = memref.alloc(%[[C4]]) : memref<?xindex>
-// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<?xf64>)
-// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<?xi1>)
+// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref<?xf64>)
+// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref<?xi1>)
 // CHECK-CONVERT: scf.for %{{.*}} = %[[C0]] to %[[C8]] step %[[C1]] {{.*}} {
 // CHECK-CONVERT:   scf.for {{.*}} {
 // CHECK-CONVERT:     scf.for {{.*}} {
@@ -124,7 +124,7 @@
   %C = bufferization.alloc_tensor() : tensor<8x4xf64, #CSR>
   %D = linalg.matmul
     ins(%A, %B: tensor<8x2xf64, #CSR>, tensor<2x4xf64, #CSR>)
-       outs(%C: tensor<8x4xf64, #CSR>) -> tensor<8x4xf64, #CSR>
+       inits(%C: tensor<8x4xf64, #CSR>) -> tensor<8x4xf64, #CSR>
   return %D: tensor<8x4xf64, #CSR>
 }
 
@@ -153,8 +153,8 @@
 // CHECK-CONVERT: %[[A:.*]] = memref.alloc(%[[C8]]) : memref<?xf64>
 // CHECK-CONVERT: %[[B:.*]] = memref.alloc(%[[C8]]) : memref<?xi1>
 // CHECK-CONVERT: %[[C:.*]] = memref.alloc(%[[C8]]) : memref<?xindex>
-// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<?xf64>)
-// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<?xi1>)
+// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref<?xf64>)
+// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref<?xi1>)
 // CHECK-CONVERT: scf.for %{{.*}} = %[[C0]] to %[[C4]] step %[[C1]] {{.*}} {
 // CHECK-CONVERT:   scf.for {{.*}} {
 // CHECK-CONVERT:     scf.for {{.*}} {
@@ -172,6 +172,6 @@
   %C = bufferization.alloc_tensor() : tensor<8x4xf64, #CSC>
   %D = linalg.matmul
     ins(%A, %B: tensor<8x2xf64, #CSC>, tensor<2x4xf64, #CSC>)
-       outs(%C: tensor<8x4xf64, #CSC>) -> tensor<8x4xf64, #CSC>
+       inits(%C: tensor<8x4xf64, #CSC>) -> tensor<8x4xf64, #CSC>
   return %D: tensor<8x4xf64, #CSC>
 }
diff --git a/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir b/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir
@@ -39,8 +39,8 @@
 // CHECK:           %[[VAL_23:.*]] = memref.cast %[[VAL_22]] : memref<300xi1> to memref<?xi1>
 // CHECK:           %[[VAL_24:.*]] = memref.alloc() : memref<300xindex>
 // CHECK:           %[[VAL_25:.*]] = memref.cast %[[VAL_24]] : memref<300xindex> to memref<?xindex>
-// CHECK:           linalg.fill ins(%[[F0]] : f64) outs(%[[VAL_20]] : memref<300xf64>)
-// CHECK:           linalg.fill ins(%[[False]] : i1) outs(%[[VAL_22]] : memref<300xi1>)
+// CHECK:           linalg.fill ins(%[[F0]] : f64) inits(%[[VAL_20]] : memref<300xf64>)
+// CHECK:           linalg.fill ins(%[[False]] : i1) inits(%[[VAL_22]] : memref<300xi1>)
 // CHECK:           %[[VAL_26:.*]] = call @sparsePositions0(%[[Arg0]], %[[I0]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
 // CHECK:           %[[VAL_27:.*]] = call @sparseCoordinates0(%[[Arg0]], %[[I0]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
 // CHECK:           %[[VAL_28:.*]] = call @sparsePositions0(%[[Arg0]], %[[I1]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
@@ -124,8 +124,8 @@
   %0 = bufferization.alloc_tensor() : tensor<100x300xf64, #DCSR>
   %cst = arith.constant 0.000000e+00 : f64
   %1 = linalg.fill ins(%cst : f64)
-                   outs(%0 : tensor<100x300xf64, #DCSR>) -> tensor<100x300xf64, #DCSR>
+                   inits(%0 : tensor<100x300xf64, #DCSR>) -> tensor<100x300xf64, #DCSR>
   %2 = linalg.matmul ins(%arg0, %arg1 : tensor<100x200xf64, #DCSR>, tensor<200x300xf64, #DCSR>)
-                     outs(%1 : tensor<100x300xf64, #DCSR>) -> tensor<100x300xf64, #DCSR>
+                     inits(%1 : tensor<100x300xf64, #DCSR>) -> tensor<100x300xf64, #DCSR>
   return %2 : tensor<100x300xf64, #DCSR>
 }
diff --git a/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir b/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir
@@ -54,7 +54,7 @@
                %argx: tensor<32xf64>) -> tensor<32xf64> {
   %0 = linalg.generic #trait1
      ins(%arga: tensor<32xf64, #SV>)
-    outs(%argx: tensor<32xf64>) {
+    inits(%argx: tensor<32xf64>) {
       ^bb(%a: f64, %x: f64):
         %0 = math.absf %a : f64
         linalg.yield %0 : f64
@@ -86,7 +86,7 @@
                 %argx: tensor<32xf64>) -> tensor<32xf64> {
   %0 = linalg.generic #trait1
      ins(%arga: tensor<32xf64, #SV>)
-    outs(%argx: tensor<32xf64>) {
+    inits(%argx: tensor<32xf64>) {
       ^bb(%a: f64, %x: f64):
         %0 = math.ceil %a : f64
         linalg.yield %0 : f64
@@ -118,7 +118,7 @@
                  %argx: tensor<32xf64>) -> tensor<32xf64> {
   %0 = linalg.generic #trait1
      ins(%arga: tensor<32xf64, #SV>)
-    outs(%argx: tensor<32xf64>) {
+    inits(%argx: tensor<32xf64>) {
       ^bb(%a: f64, %x: f64):
         %0 = math.floor %a : f64
         linalg.yield %0 : f64
@@ -150,7 +150,7 @@
                %argx: tensor<32xf64>) -> tensor<32xf64> {
   %0 = linalg.generic #trait1
      ins(%arga: tensor<32xf64, #SV>)
-    outs(%argx: tensor<32xf64>) {
+    inits(%argx: tensor<32xf64>) {
       ^bb(%a: f64, %x: f64):
         %0 = arith.negf %a : f64
         linalg.yield %0 : f64
@@ -210,7 +210,7 @@
                %argx: tensor<32xf64>) -> tensor<32xf64> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xf64, #SV>, tensor<32xf64>)
-    outs(%argx: tensor<32xf64>) {
+    inits(%argx: tensor<32xf64>) {
       ^bb(%a: f64, %b: f64, %x: f64):
         %0 = arith.addf %a, %b : f64
         linalg.yield %0 : f64
@@ -272,7 +272,7 @@
                %argx: tensor<32xf64>) -> tensor<32xf64> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xf64, #SV>, tensor<32xf64>)
-    outs(%argx: tensor<32xf64>) {
+    inits(%argx: tensor<32xf64>) {
       ^bb(%a: f64, %b: f64, %x: f64):
         %0 = arith.subf %a, %b : f64
         linalg.yield %0 : f64
@@ -308,7 +308,7 @@
                %argx: tensor<32xf64>) -> tensor<32xf64> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xf64, #SV>, tensor<32xf64>)
-    outs(%argx: tensor<32xf64>) {
+    inits(%argx: tensor<32xf64>) {
       ^bb(%a: f64, %b: f64, %x: f64):
         %0 = arith.mulf %a, %b : f64
         linalg.yield %0 : f64
@@ -342,7 +342,7 @@
   %c = arith.constant 2.0 : f64
   %0 = linalg.generic #traitc
      ins(%arga: tensor<32xf64, #SV>)
-    outs(%argx: tensor<32xf64>) {
+    inits(%argx: tensor<32xf64>) {
       ^bb(%a: f64, %x: f64):
         %0 = arith.divf %a, %c : f64
         linalg.yield %0 : f64
@@ -382,7 +382,7 @@
   %xinp = bufferization.alloc_tensor() : tensor<32xf64, #SV>
   %0 = linalg.generic #trait1
      ins(%arga: tensor<32xf64, #SV>)
-    outs(%xinp: tensor<32xf64, #SV>) {
+    inits(%xinp: tensor<32xf64, #SV>) {
       ^bb(%a: f64, %x: f64):
 	%0 = math.absf %a : f64
         %1 = math.ceil %0 : f64
@@ -423,7 +423,7 @@
   %init = bufferization.alloc_tensor() : tensor<32xcomplex<f64>, #SV>
   %0 = linalg.generic #traitc
      ins(%arg0: tensor<32xcomplex<f64>, #SV>)
-    outs(%init: tensor<32xcomplex<f64>, #SV>) {
+    inits(%init: tensor<32xcomplex<f64>, #SV>) {
       ^bb(%a: complex<f64>, %x: complex<f64>):
         %0 = complex.div %a, %c : complex<f64>
         linalg.yield %0 : complex<f64>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_index.mlir b/mlir/test/Dialect/SparseTensor/sparse_index.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_index.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_index.mlir
@@ -55,7 +55,7 @@
   %init = bufferization.alloc_tensor(%0, %1) : tensor<?x?xi64, #DenseMatrix>
   %r = linalg.generic #trait
       ins(%arga: tensor<?x?xi64, #DenseMatrix>)
-     outs(%init: tensor<?x?xi64, #DenseMatrix>) {
+     inits(%init: tensor<?x?xi64, #DenseMatrix>) {
       ^bb(%a: i64, %x: i64):
         %i = linalg.index 0 : index
         %j = linalg.index 1 : index
@@ -112,7 +112,7 @@
   %init = bufferization.alloc_tensor(%0, %1) : tensor<?x?xi64, #SparseMatrix>
   %r = linalg.generic #trait
       ins(%arga: tensor<?x?xi64, #SparseMatrix>)
-     outs(%init: tensor<?x?xi64, #SparseMatrix>) {
+     inits(%init: tensor<?x?xi64, #SparseMatrix>) {
       ^bb(%a: i64, %x: i64):
         %i = linalg.index 0 : index
         %j = linalg.index 1 : index
diff --git a/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir b/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir
@@ -74,7 +74,7 @@
                %argx: tensor<32xi64>) -> tensor<32xi64> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>)
-    outs(%argx: tensor<32xi64>) {
+    inits(%argx: tensor<32xi64>) {
       ^bb(%a: i64, %b: i64, %x: i64):
         %0 = arith.addi %a, %b : i64
         linalg.yield %0 : i64
@@ -137,7 +137,7 @@
                %argx: tensor<32xi64>) -> tensor<32xi64> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>)
-    outs(%argx: tensor<32xi64>) {
+    inits(%argx: tensor<32xi64>) {
       ^bb(%a: i64, %b: i64, %x: i64):
         %0 = arith.subi %a, %b : i64
         linalg.yield %0 : i64
@@ -173,7 +173,7 @@
                %argx: tensor<32xi64>) -> tensor<32xi64> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>)
-    outs(%argx: tensor<32xi64>) {
+    inits(%argx: tensor<32xi64>) {
       ^bb(%a: i64, %b: i64, %x: i64):
         %0 = arith.muli %a, %b : i64
         linalg.yield %0 : i64
@@ -207,7 +207,7 @@
   %c = arith.constant 2 : i64
   %0 = linalg.generic #traitc
      ins(%arga: tensor<32xi64, #SV>)
-    outs(%argx: tensor<32xi64>) {
+    inits(%argx: tensor<32xi64>) {
       ^bb(%a: i64, %x: i64):
         %0 = arith.divsi %a, %c : i64
         linalg.yield %0 : i64
@@ -241,7 +241,7 @@
   %c = arith.constant 2 : i64
   %0 = linalg.generic #traitc
      ins(%arga: tensor<32xi64, #SV>)
-    outs(%argx: tensor<32xi64>) {
+    inits(%argx: tensor<32xi64>) {
       ^bb(%a: i64, %x: i64):
         %0 = arith.divui %a, %c : i64
         linalg.yield %0 : i64
@@ -277,7 +277,7 @@
                %argx: tensor<32xi64>) -> tensor<32xi64> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>)
-    outs(%argx: tensor<32xi64>) {
+    inits(%argx: tensor<32xi64>) {
       ^bb(%a: i64, %b: i64, %x: i64):
         %0 = arith.andi %a, %b : i64
         linalg.yield %0 : i64
@@ -337,7 +337,7 @@
               %argx: tensor<32xi64>) -> tensor<32xi64> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>)
-    outs(%argx: tensor<32xi64>) {
+    inits(%argx: tensor<32xi64>) {
       ^bb(%a: i64, %b: i64, %x: i64):
         %0 = arith.ori %a, %b : i64
         linalg.yield %0 : i64
@@ -397,7 +397,7 @@
                %argx: tensor<32xi64>) -> tensor<32xi64> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>)
-    outs(%argx: tensor<32xi64>) {
+    inits(%argx: tensor<32xi64>) {
       ^bb(%a: i64, %b: i64, %x: i64):
         %0 = arith.xori %a, %b : i64
         linalg.yield %0 : i64
@@ -431,7 +431,7 @@
   %c = arith.constant 2 : i64
   %0 = linalg.generic #traitc
      ins(%arga: tensor<32xi64, #SV>)
-    outs(%argx: tensor<32xi64>) {
+    inits(%argx: tensor<32xi64>) {
       ^bb(%a: i64, %x: i64):
         %0 = arith.shrsi %a, %c : i64
         linalg.yield %0 : i64
@@ -465,7 +465,7 @@
   %c = arith.constant 2 : i64
   %0 = linalg.generic #traitc
      ins(%arga: tensor<32xi64, #SV>)
-    outs(%argx: tensor<32xi64>) {
+    inits(%argx: tensor<32xi64>) {
       ^bb(%a: i64, %x: i64):
         %0 = arith.shrui %a, %c : i64
         linalg.yield %0 : i64
@@ -499,7 +499,7 @@
   %c = arith.constant 2 : i64
   %0 = linalg.generic #traitc
      ins(%arga: tensor<32xi64, #SV>)
-    outs(%argx: tensor<32xi64>) {
+    inits(%argx: tensor<32xi64>) {
       ^bb(%a: i64, %x: i64):
         %0 = arith.shli %a, %c : i64
         linalg.yield %0 : i64
diff --git a/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir b/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir
@@ -47,7 +47,7 @@
               %c: tensor<10x30xf32>) -> tensor<10x30xf32> {
   %0 = linalg.matmul
     ins(%a, %b: tensor<10x20xf32, #DCSR>, tensor<20x30xf32>)
-    outs(%c: tensor<10x30xf32>) -> tensor<10x30xf32>
+    inits(%c: tensor<10x30xf32>) -> tensor<10x30xf32>
   return %0 : tensor<10x30xf32>
 }
 
@@ -93,7 +93,7 @@
                              %c: tensor<10x30xf32>) -> tensor<10x30xf32> {
   %0 = linalg.matmul
     ins(%a, %b: tensor<10x20xf32>, tensor<20x30xf32,#DCSR>)
-    outs(%c: tensor<10x30xf32>) -> tensor<10x30xf32>
+    inits(%c: tensor<10x30xf32>) -> tensor<10x30xf32>
   return %0 : tensor<10x30xf32>
 }
 
@@ -191,7 +191,7 @@
   %C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR>
   %D = linalg.matmul
     ins(%A, %B: tensor<4x8xf64, #DCSR>, tensor<8x4xf64, #DCSR>)
-       outs(%C: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR>
+       inits(%C: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR>
   return %D: tensor<4x4xf64, #DCSR>
 }
 
@@ -292,7 +292,7 @@
   %c2 = arith.constant 2 : i32
   %0 = linalg.quantized_matmul
     ins(%input1, %input2, %c2, %c0 : tensor<5x3xi8>, tensor<3x6xi8, #DCSR>, i32, i32)
-    outs(%output : tensor<5x6xi64>) -> tensor<5x6xi64>
+    inits(%output : tensor<5x6xi64>) -> tensor<5x6xi64>
   return %0: tensor<5x6xi64>
 }
 
@@ -354,6 +354,6 @@
 		 %x: tensor<f32>) -> tensor<f32> {
   %dot = linalg.dot ins(%a, %b: tensor<1024xf32, #SparseVector>,
                                 tensor<1024xf32, #SparseVector>)
-                   outs(%x: tensor<f32>) -> tensor<f32>
+                   inits(%x: tensor<f32>) -> tensor<f32>
   return %dot : tensor<f32>
 }
diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_lower.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_lower.mlir
@@ -115,7 +115,7 @@
              %argx: tensor<32xf64>) -> tensor<32xf64> {
   %0 = linalg.generic #trait_matvec
       ins(%arga, %argb : tensor<32x64xf64, #CSR>, tensor<64xf64>)
-      outs(%argx: tensor<32xf64>) {
+      inits(%argx: tensor<32xf64>) {
     ^bb(%A: f64, %b: f64, %x: f64):
       %0 = arith.mulf %A, %b : f64
       %1 = arith.addf %x, %0 : f64
diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir
@@ -115,7 +115,7 @@
              %argx: tensor<32xf64>) -> tensor<32xf64> {
   %0 = linalg.generic #trait_matvec
       ins(%arga, %argb : tensor<32x64xf64, #CSC>, tensor<64xf64>)
-      outs(%argx: tensor<32xf64>) {
+      inits(%argx: tensor<32xf64>) {
     ^bb(%A: f64, %b: f64, %x: f64):
       %0 = arith.mulf %A, %b : f64
       %1 = arith.addf %x, %0 : f64
diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir
@@ -115,7 +115,7 @@
 	           %argx: tensor<32xf64>) -> tensor<32xf64> {
   %0 = linalg.generic #trait_matvec
       ins(%arga, %argb : tensor<32x64xf64, #CSR>, tensor<64xf64>)
-      outs(%argx: tensor<32xf64>) {
+      inits(%argx: tensor<32xf64>) {
     ^bb(%A: f64, %b: f64, %x: f64):
       %0 = arith.mulf %A, %b : f64
       %1 = arith.addf %x, %0 : f64
diff --git a/mlir/test/Dialect/SparseTensor/sparse_matmul_codegen.mlir b/mlir/test/Dialect/SparseTensor/sparse_matmul_codegen.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_matmul_codegen.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_matmul_codegen.mlir
@@ -84,8 +84,8 @@
 // CHECK:           %[[VAL_31:.*]] = m
 // CHECK:           %[[VAL_32:.*]] = memref.alloc() : memref<4xindex>
 // CHECK:           %[[VAL_33:.*]] = memref.cast %[[VAL_32]] : memref<4xindex> to memref<?xindex>
-// CHECK:           linalg.fill ins(%[[VAL_9]] : f64) outs(%[[VAL_30]] : memref<4xf64>)
-// CHECK:           linalg.fill ins(%[[VAL_12]] : i1) outs(%[[VAL_31]] : memref<4xi1>)
+// CHECK:           linalg.fill ins(%[[VAL_9]] : f64) inits(%[[VAL_30]] : memref<4xf64>)
+// CHECK:           linalg.fill ins(%[[VAL_12]] : i1) inits(%[[VAL_31]] : memref<4xi1>)
 // CHECK:           %[[VAL_34:.*]]:4 = scf.for %[[VAL_35:.*]] = %[[VAL_10]] to %[[VAL_8]] step %[[VAL_11]] iter_args(%[[VAL_36:.*]] = %[[VAL_27]], %[[VAL_37:.*]] = %[[VAL_17]], %[[VAL_38:.*]] = %[[VAL_19]], %[[VAL_39:.*]] = %[[VAL_29]]) -> (memref<?xindex>, memref<?xindex>, memref<?xf64>, !sparse_tensor.storage_specifier
 // CHECK:             %[[VAL_40:.*]] = memref.load %[[VAL_0]]{{\[}}%[[VAL_35]]] : memref<?xindex>
 // CHECK:             %[[VAL_41:.*]] = arith.addi %[[VAL_35]], %[[VAL_11]] : index
@@ -148,6 +148,6 @@
   %C = bufferization.alloc_tensor() : tensor<4x4xf64, #CSR>
   %D = linalg.matmul
     ins(%A, %B: tensor<4x8xf64, #CSR>, tensor<8x4xf64, #CSR>)
-       outs(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR>
+       inits(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR>
   return %D: tensor<4x4xf64, #CSR>
 }
diff --git a/mlir/test/Dialect/SparseTensor/sparse_nd.mlir b/mlir/test/Dialect/SparseTensor/sparse_nd.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_nd.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_nd.mlir
@@ -40,7 +40,7 @@
 // CHECK-DAG:       %[[VAL_17:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 4 : index} : tensor<80x70x60x50x40x30x20x10xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "dense", "compressed", "compressed", "dense", "dense", "dense" ] }>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_18:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<80x70x60x50x40x30x20x10xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "dense", "compressed", "compressed", "dense", "dense", "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_20:.*]] = bufferization.to_memref %[[VAL_2]] : memref<10x20x30x40x50x60x70x80xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_20]] : memref<10x20x30x40x50x60x70x80xf32>
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_20]] : memref<10x20x30x40x50x60x70x80xf32>
 // CHECK:           scf.for %[[VAL_21:.*]] = %[[VAL_11]] to %[[VAL_10]] step %[[VAL_12]] {
 // CHECK:             scf.for %[[VAL_22:.*]] = %[[VAL_11]] to %[[VAL_9]] step %[[VAL_12]] {
 // CHECK:               %[[VAL_23:.*]] = arith.muli %[[VAL_21]], %[[VAL_9]] : index
@@ -89,7 +89,7 @@
   %0 = linalg.generic #trait_mul
     ins(%arga, %argb: tensor<10x20x30x40x50x60x70x80xf32>,
                       tensor<80x70x60x50x40x30x20x10xf32, #SparseTensor>)
-    outs(%argx: tensor<10x20x30x40x50x60x70x80xf32>) {
+    inits(%argx: tensor<10x20x30x40x50x60x70x80xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
diff --git a/mlir/test/Dialect/SparseTensor/sparse_out.mlir b/mlir/test/Dialect/SparseTensor/sparse_out.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_out.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_out.mlir
@@ -48,7 +48,7 @@
 func.func @sparse_simply_dynamic1(%argx: tensor<32x16xf32, #DCSR>) -> tensor<32x16xf32, #DCSR> {
   %c = arith.constant 2.0 : f32
   %0 = linalg.generic #trait_scale_inpl
-    outs(%argx: tensor<32x16xf32, #DCSR>) {
+    inits(%argx: tensor<32x16xf32, #DCSR>) {
       ^bb(%x: f32):
         %1 = arith.mulf %x, %c : f32
         linalg.yield %1 : f32
@@ -81,7 +81,7 @@
 // CHECK:         }
 func.func @sparse_simply_dynamic2(%argx: tensor<32x16xf32, #DCSR>) -> tensor<32x16xf32, #DCSR> {
   %0 = linalg.generic #trait_scale_inpl
-    outs(%argx: tensor<32x16xf32, #DCSR>) {
+    inits(%argx: tensor<32x16xf32, #DCSR>) {
       ^bb(%x: f32):
         %1 = arith.addf %x, %x : f32
         linalg.yield %1 : f32
@@ -129,7 +129,7 @@
   %xm = bufferization.alloc_tensor() : tensor<10x20xf32, #DCSR>
   %0 = linalg.generic #trait_scale
      ins(%arga: tensor<10x20xf32, #CSR>)
-      outs(%xm: tensor<10x20xf32, #DCSR>) {
+      inits(%xm: tensor<10x20xf32, #DCSR>) {
       ^bb(%a: f32, %x: f32):
         %1 = arith.mulf %a, %s : f32
         linalg.yield %1 : f32
@@ -292,7 +292,7 @@
   %0 = linalg.generic #trait_sumred
     ins(%arga, %argb: tensor<?x?x?xi32, #SparseTensor>,
                       tensor<?x?x?xi32, #SparseTensor>)
-    outs(%xinit: tensor<?x?xi32, #DCSR>) {
+    inits(%xinit: tensor<?x?xi32, #DCSR>) {
       ^bb(%a: i32, %b: i32, %x: i32):
         %0 = arith.muli %a, %b : i32
         %1 = arith.addi %x, %0 : i32
@@ -407,7 +407,7 @@
   %0 = linalg.generic #trait_matmat
        ins(%arga, %argb: tensor<?x?xf32, #DCSR>,
                          tensor<?x?xf32, #DCSR>)
-      outs(%cinit: tensor<?x?xf32, #DCSR>) {
+      inits(%cinit: tensor<?x?xf32, #DCSR>) {
     ^bb(%a: f32, %b: f32, %c: f32):
       %1 = arith.mulf %a, %b : f32
       %2 = arith.addf %c, %1 : f32
diff --git a/mlir/test/Dialect/SparseTensor/sparse_outbuf.mlir b/mlir/test/Dialect/SparseTensor/sparse_outbuf.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_outbuf.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_outbuf.mlir
@@ -20,7 +20,7 @@
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<10xi32, #{{.*}}> to memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10xi32, #{{.*}}> to memref<?xi32>
 // CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<10xf32>
-// CHECK:           linalg.fill ins(%[[VAL_3]] : f32) outs(%[[VAL_8]] : memref<10xf32>)
+// CHECK:           linalg.fill ins(%[[VAL_3]] : f32) inits(%[[VAL_8]] : memref<10xf32>)
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_4]] {
@@ -36,7 +36,7 @@
                           %argb: tensor<10xf32>) -> tensor<10xf32> {
   %0 = linalg.generic #trait
   ins(%arga: tensor<10xi32, #SV>)
-  outs(%argb: tensor<10xf32>) {
+  inits(%argb: tensor<10xf32>) {
     ^bb(%a: i32, %x : f32):
       %cst = arith.sitofp %a : i32 to f32
       linalg.yield %cst : f32
@@ -54,7 +54,7 @@
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<10xi32, #{{.*}}> to memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10xi32, #{{.*}}> to memref<?xi32>
 // CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_4]] : memref<10xf32>
-// CHECK:           linalg.fill ins(%[[VAL_2]] : f32) outs(%[[VAL_8]] : memref<10xf32>)
+// CHECK:           linalg.fill ins(%[[VAL_2]] : f32) inits(%[[VAL_8]] : memref<10xf32>)
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_1]]] : memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_3]] {
@@ -70,7 +70,7 @@
   %m = bufferization.alloc_tensor() : tensor<10xf32>
   %0 = linalg.generic #trait
   ins(%arga: tensor<10xi32, #SV>)
-  outs(%m: tensor<10xf32>) {
+  inits(%m: tensor<10xf32>) {
     ^bb(%a: i32, %x : f32):
       %cst = arith.sitofp %a : i32 to f32
       linalg.yield %cst : f32
@@ -103,7 +103,7 @@
                           %argb: tensor<10xf32>) -> tensor<10xf32> {
   %0 = linalg.generic #trait
   ins(%arga: tensor<10xf32, #SV>)
-  outs(%argb: tensor<10xf32>) {
+  inits(%argb: tensor<10xf32>) {
     ^bb(%a: f32, %x : f32):
       %up = arith.addf %a, %x : f32
       linalg.yield %up : f32
diff --git a/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir b/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir
@@ -61,7 +61,7 @@
 	       %argx: tensor<?x?xf32>) -> tensor<?x?xf32> {
   %0 = linalg.generic #trait_dd
      ins(%arga: tensor<?x?xf32, #DenseMatrix>)
-    outs(%argx: tensor<?x?xf32>) {
+    inits(%argx: tensor<?x?xf32>) {
       ^bb(%a: f32, %x: f32):
         %0 = arith.mulf %a, %scale : f32
         linalg.yield %0 : f32
@@ -109,7 +109,7 @@
 	       %argx: tensor<?x?xf32>) -> tensor<?x?xf32> {
   %0 = linalg.generic #trait_ss
      ins(%arga: tensor<?x?xf32, #SparseMatrix>)
-    outs(%argx: tensor<?x?xf32>) {
+    inits(%argx: tensor<?x?xf32>) {
       ^bb(%a: f32, %x: f32):
         %0 = arith.mulf %a, %scale : f32
         linalg.yield %0 : f32
@@ -159,7 +159,7 @@
 	     %argx: tensor<16xf32>) -> tensor<16xf32> {
   %0 = linalg.generic #trait_matvec
       ins(%arga, %argb : tensor<16x32xf32, #CSR>, tensor<32xf32>)
-     outs(%argx: tensor<16xf32>) {
+     inits(%argx: tensor<16xf32>) {
     ^bb(%A: f32, %b: f32, %x: f32):
       %0 = arith.mulf %A, %b : f32
       %1 = arith.addf %0, %x : f32
diff --git a/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir b/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir
@@ -53,7 +53,7 @@
 	          %argx: tensor<16xf32>) -> tensor<16xf32> {
   %0 = linalg.generic #trait_matvec
       ins(%arga, %argb : tensor<16x32xf32, #CSR>, tensor<32xf32>)
-     outs(%argx: tensor<16xf32>) {
+     inits(%argx: tensor<16xf32>) {
     ^bb(%A: f32, %b: f32, %x: f32):
       %0 = arith.mulf %A, %b : f32
       %1 = arith.addf %0, %x : f32
diff --git a/mlir/test/Dialect/SparseTensor/sparse_perm.mlir b/mlir/test/Dialect/SparseTensor/sparse_perm.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_perm.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_perm.mlir
@@ -25,7 +25,7 @@
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<20x30x10xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_9]] : memref<20x30x10xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_9]] : memref<20x30x10xf32>)
 // CHECK:           scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
 // CHECK:             scf.for %[[VAL_11:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
 // CHECK:               %[[VAL_12:.*]] = arith.muli %[[VAL_10]], %[[VAL_4]] : index
@@ -45,7 +45,7 @@
                               %argx: tensor<20x30x10xf32>) -> tensor<20x30x10xf32> {
   %0 = linalg.generic #trait
     ins(%arga: tensor<10x20x30xf32, #X>)
-    outs(%argx: tensor<20x30x10xf32>) {
+    inits(%argx: tensor<20x30x10xf32>) {
       ^bb(%a : f32, %x: f32):
         linalg.yield %a : f32
   } -> tensor<20x30x10xf32>
@@ -64,7 +64,7 @@
 // CHECK-DAG:       %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK-DAG:       %[[VAL_8:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<?x?x?xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_10]] : memref<?x?x?xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_10]] : memref<?x?x?xf32>)
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_3]] to %[[VAL_6]] step %[[VAL_4]] {
 // CHECK:             scf.for %[[VAL_12:.*]] = %[[VAL_3]] to %[[VAL_7]] step %[[VAL_4]] {
 // CHECK:               %[[VAL_13:.*]] = arith.muli %[[VAL_7]], %[[VAL_11]] : index
@@ -84,7 +84,7 @@
                                %argx: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
   %0 = linalg.generic #trait
     ins(%arga: tensor<?x?x?xf32, #X>)
-    outs(%argx: tensor<?x?x?xf32>) {
+    inits(%argx: tensor<?x?x?xf32>) {
       ^bb(%a : f32, %x: f32):
         linalg.yield %a : f32
   } -> tensor<?x?x?xf32>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir b/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir
@@ -83,7 +83,7 @@
                           %argx: tensor<f32>) -> tensor<f32> {
   %0 = linalg.generic #trait
     ins(%arga: tensor<?x?x?xf32, #X>)
-    outs(%argx: tensor<f32>) {
+    inits(%argx: tensor<f32>) {
       ^bb(%a : f32, %x: f32):
         %0 = arith.addf %x, %a : f32
         linalg.yield %0 : f32
diff --git a/mlir/test/Dialect/SparseTensor/sparse_reshape_dot.mlir b/mlir/test/Dialect/SparseTensor/sparse_reshape_dot.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_reshape_dot.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_reshape_dot.mlir
@@ -78,7 +78,7 @@
 func.func @sparse_reshape_fused(%arg0: tensor<5x6xf32>, %arg1: tensor<6x2x3xf32, #COO_3D>) -> tensor<?x?x?xf32> {
   %collapsed = tensor.collapse_shape %arg1 [[0], [1, 2]] : tensor<6x2x3xf32, #COO_3D> into tensor<6x6xf32, #COO_2D>
   %0 = tensor.empty() : tensor<5x6xf32>
-  %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32>, tensor<6x6xf32, #COO_2D>) outs(%0 : tensor<5x6xf32>) -> tensor<5x6xf32>
+  %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32>, tensor<6x6xf32, #COO_2D>) inits(%0 : tensor<5x6xf32>) -> tensor<5x6xf32>
   %expanded = tensor.expand_shape %2 [[0], [1, 2]] : tensor<5x6xf32> into tensor<5x2x3xf32>
   %ret1 = tensor.cast %expanded : tensor<5x2x3xf32> to tensor<?x?x?xf32>
   return %ret1 : tensor<?x?x?xf32>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir b/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir
@@ -68,7 +68,7 @@
   %c = arith.constant 2.2 : f32
   %0 = linalg.generic #trait
      ins(%arga, %argp, %argq: tensor<32x16xf32, #SparseMatrix>, tensor<f32>, f32)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %p: f32, %q: f32, %x: f32):
         %0 = arith.mulf %a, %p : f32     // scalar tensor argument
         %1 = arith.mulf %0, %q : f32     // scalar argument
diff --git a/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir b/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir
@@ -32,7 +32,7 @@
                                         affine_map<(d0, d1) -> (d0, d1)>],
                                         iterator_types = ["parallel", "parallel"]}
                                         ins(%cst : f64)
-                                        outs(%0 : tensor<1024x1024xf64>) {
+                                        inits(%0 : tensor<1024x1024xf64>) {
     ^bb0(%a: f64, %x: f64):
       linalg.yield %a : f64
     } -> tensor<1024x1024xf64>
@@ -49,7 +49,7 @@
   %0 = bufferization.alloc_tensor() : tensor<32xf64>
   %1 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>],
                                         iterator_types = ["parallel"]}
-                                        outs(%0 : tensor<32xf64>) {
+                                        inits(%0 : tensor<32xf64>) {
     ^bb0(%x: f64):
       linalg.yield %cst : f64
     } -> tensor<32xf64>
@@ -105,7 +105,7 @@
   %1 = arith.constant dense<0.0> : tensor<8x8xf64>
   %2 = linalg.generic #trait_matmul
     ins(%arga, %argb : tensor<8x8xf64>, tensor<8x8xf64>)
-    outs(%1 : tensor<8x8xf64>) {
+    inits(%1 : tensor<8x8xf64>) {
       ^bb0(%a: f64, %b: f64, %x: f64):
         %p = arith.mulf %a, %b : f64
         %q = arith.addf %x, %p : f64
@@ -114,7 +114,7 @@
   // Sample the result with elements-wise multiplication with sparse matrix.
   %3 = linalg.generic #trait_scale
     ins(%2, %args : tensor<8x8xf64>, tensor<8x8xf64, #SM>)
-    outs(%1 : tensor<8x8xf64>) {
+    inits(%1 : tensor<8x8xf64>) {
       ^bb0(%t: f64, %s: f64, %x: f64):
         %r = arith.mulf %t, %s : f64
         linalg.yield %r : f64
@@ -187,7 +187,7 @@
   %1 = arith.constant dense<0.0> : tensor<8x8xf64>
   %2 = linalg.generic #trait_matmul
     ins(%arga, %argb : tensor<8x8xf64>, tensor<8x8xf64>)
-    outs(%1 : tensor<8x8xf64>) {
+    inits(%1 : tensor<8x8xf64>) {
       ^bb0(%a: f64, %b: f64, %x: f64):
         %p = arith.mulf %a, %b : f64
         %q = arith.addf %x, %p : f64
@@ -197,7 +197,7 @@
   %3 = bufferization.alloc_tensor() : tensor<8x8xf64, #SM>
   %4 = linalg.generic #trait_scale
     ins(%2, %args : tensor<8x8xf64>, tensor<8x8xf64, #SM>)
-    outs(%3 : tensor<8x8xf64, #SM>) {
+    inits(%3 : tensor<8x8xf64, #SM>) {
       ^bb0(%t: f64, %s: f64, %x: f64):
         %r = arith.mulf %t, %s : f64
         linalg.yield %r : f64
diff --git a/mlir/test/Dialect/SparseTensor/sparse_sddmm_org.mlir b/mlir/test/Dialect/SparseTensor/sparse_sddmm_org.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_sddmm_org.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_sddmm_org.mlir
@@ -83,7 +83,7 @@
   %1 = arith.constant dense<0.0> : tensor<8x8xf64>
   %2 = linalg.generic #trait_matmul
     ins(%arga, %argb : tensor<8x8xf64>, tensor<8x8xf64>)
-    outs(%1 : tensor<8x8xf64>) {
+    inits(%1 : tensor<8x8xf64>) {
       ^bb0(%a: f64, %b: f64, %x: f64):
         %p = arith.mulf %a, %b : f64
         %q = arith.addf %x, %p : f64
@@ -93,7 +93,7 @@
   %3 = bufferization.alloc_tensor() : tensor<8x8xf64, #SM>
   %4 = linalg.generic #trait_scale
     ins(%2, %args : tensor<8x8xf64>, tensor<8x8xf64, #SM>)
-    outs(%3 : tensor<8x8xf64, #SM>) {
+    inits(%3 : tensor<8x8xf64, #SM>) {
       ^bb0(%t: f64, %s: f64, %x: f64):
         %r = arith.mulf %t, %s : f64
         linalg.yield %r : f64
diff --git a/mlir/test/Dialect/SparseTensor/sparse_storage.mlir b/mlir/test/Dialect/SparseTensor/sparse_storage.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_storage.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_storage.mlir
@@ -40,7 +40,7 @@
 func.func @mul64(%arga: tensor<32xf64, #SparseVector64>, %argb: tensor<32xf64>, %argx: tensor<32xf64>) -> tensor<32xf64> {
   %0 = linalg.generic #trait_mul
      ins(%arga, %argb: tensor<32xf64, #SparseVector64>, tensor<32xf64>)
-    outs(%argx: tensor<32xf64>) {
+    inits(%argx: tensor<32xf64>) {
       ^bb(%a: f64, %b: f64, %x: f64):
         %0 = arith.mulf %a, %b : f64
         linalg.yield %0 : f64
@@ -69,7 +69,7 @@
 func.func @mul32(%arga: tensor<32xf64, #SparseVector32>, %argb: tensor<32xf64>, %argx: tensor<32xf64>) -> tensor<32xf64> {
   %0 = linalg.generic #trait_mul
      ins(%arga, %argb: tensor<32xf64, #SparseVector32>, tensor<32xf64>)
-    outs(%argx: tensor<32xf64>) {
+    inits(%argx: tensor<32xf64>) {
       ^bb(%a: f64, %b: f64, %x: f64):
         %0 = arith.mulf %a, %b : f64
         linalg.yield %0 : f64
diff --git a/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir b/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir
@@ -50,7 +50,7 @@
   %i = bufferization.alloc_tensor() : tensor<4x3xf64, #DCSR>
   %0 = linalg.generic #transpose_trait
      ins(%arga: tensor<3x4xf64, #DCSR>)
-     outs(%i: tensor<4x3xf64, #DCSR>) {
+     inits(%i: tensor<4x3xf64, #DCSR>) {
      ^bb(%a: f64, %x: f64):
        linalg.yield %a : f64
   } -> tensor<4x3xf64, #DCSR>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_vector.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_vector.mlir
@@ -75,7 +75,7 @@
 func.func @scale_d(%arga: tensor<1024xf32, #DenseVector>, %b: f32, %argx: tensor<1024xf32>) -> tensor<1024xf32> {
   %0 = linalg.generic #trait_scale_d
     ins(%arga: tensor<1024xf32, #DenseVector>)
-    outs(%argx: tensor<1024xf32>) {
+    inits(%argx: tensor<1024xf32>) {
       ^bb(%a: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -199,7 +199,7 @@
 		 %argx: tensor<1024xf32>) -> tensor<1024xf32> {
   %0 = linalg.generic #trait_mul_s
     ins(%arga, %argb: tensor<1024xf32, #SparseVector>, tensor<1024xf32>)
-    outs(%argx: tensor<1024xf32>) {
+    inits(%argx: tensor<1024xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -297,7 +297,7 @@
 		       %argx: tensor<f32>) -> tensor<f32> {
   %0 = linalg.generic #trait_reduction_d
     ins(%arga, %argb: tensor<1024xf32, #DenseVector>, tensor<1024xf32>)
-    outs(%argx: tensor<f32>) {
+    inits(%argx: tensor<f32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         %1 = arith.addf %x, %0 : f32
@@ -438,7 +438,7 @@
 		  %argx: tensor<512x1024xf32>) -> tensor<512x1024xf32> {
   %0 = linalg.generic #trait_mul_ds
     ins(%arga, %argb: tensor<512x1024xf32, #SparseMatrix>, tensor<512x1024xf32>)
-    outs(%argx: tensor<512x1024xf32>) {
+    inits(%argx: tensor<512x1024xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -552,7 +552,7 @@
                      %argx: tensor<33x64xf64>) -> tensor<33x64xf64> {
   %0 = linalg.generic #trait_affine
      ins(%arga: tensor<32x64xf64, #SparseMatrix>)
-    outs(%argx: tensor<33x64xf64>) {
+    inits(%argx: tensor<33x64xf64>) {
       ^bb(%a: f64, %x: f64):
         %0 = arith.addf %x, %a : f64
         linalg.yield %0 : f64
diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector_chain.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector_chain.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_vector_chain.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_vector_chain.mlir
@@ -112,7 +112,7 @@
   %0 = linalg.generic #trait
      ins(%arga, %argb: tensor<64x32xf64, #SparseMatrix>,
                        tensor<64x32xf64, #SparseMatrix>)
-      outs(%argx: tensor<f64>) {
+      inits(%argx: tensor<f64>) {
       ^bb(%a: f64, %b: f64, %x: f64):
         %m = arith.addf %a, %b : f64
         %t = arith.addf %x, %m : f64
diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir
@@ -29,7 +29,7 @@
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8xi64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8xi64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xi64>
 // CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_7]] : memref<8xi64>
-// CHECK:           linalg.fill ins(%[[VAL_4]] : i64) outs(%[[VAL_11]] : memref<8xi64>)
+// CHECK:           linalg.fill ins(%[[VAL_4]] : i64) inits(%[[VAL_11]] : memref<8xi64>)
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_14:.*]] = %[[VAL_12]] to %[[VAL_13]] step %[[VAL_1]] {
@@ -48,7 +48,7 @@
   %init = tensor.empty() : tensor<8xi64>
   %r = linalg.generic #trait_1d
       ins(%arga: tensor<8xi64, #SparseVector>)
-     outs(%init: tensor<8xi64>) {
+     inits(%init: tensor<8xi64>) {
       ^bb(%a: i64, %x: i64):
         %i = linalg.index 0 : index
         %ii = arith.index_cast %i : index to i64
@@ -71,7 +71,7 @@
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8xi64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8xi64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xi64>
 // CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_7]] : memref<8xi64>
-// CHECK:           linalg.fill ins(%[[VAL_3]] : i64) outs(%[[VAL_11]] : memref<8xi64>)
+// CHECK:           linalg.fill ins(%[[VAL_3]] : i64) inits(%[[VAL_11]] : memref<8xi64>)
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           %[[VAL_14:.*]]:2 = scf.while (%[[VAL_15:.*]] = %[[VAL_12]], %[[VAL_16:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
@@ -113,7 +113,7 @@
   %init = tensor.empty() : tensor<8xi64>
   %r = linalg.generic #trait_1d
       ins(%arga: tensor<8xi64, #SparseVector>)
-     outs(%init: tensor<8xi64>) {
+     inits(%init: tensor<8xi64>) {
       ^bb(%a: i64, %x: i64):
         %i = linalg.index 0 : index
         %ii = arith.index_cast %i : index to i64
diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector_mv.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector_mv.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_vector_mv.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_vector_mv.mlir
@@ -21,7 +21,7 @@
 			 %argx: tensor<?xf32>) -> tensor<?xf32> {
   %x = linalg.generic #matvec
     ins(%arga, %argb: tensor<?x?xf32, #Dense>, tensor<?xf32>)
-    outs(%argx: tensor<?xf32>) {
+    inits(%argx: tensor<?xf32>) {
     ^bb(%a: f32, %b: f32, %x: f32):
       %0 = arith.mulf %a, %b : f32
       %1 = arith.addf %x, %0 : f32
diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector_ops.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector_ops.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_vector_ops.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_vector_ops.mlir
@@ -56,7 +56,7 @@
   %t = arith.constant 1 : i32
   %0 = linalg.generic #trait
     ins(%arga, %argb: tensor<1024xf32, #DenseVector>, tensor<1024xf32, #DenseVector>)
-    outs(%init: tensor<1024xf32>) {
+    inits(%init: tensor<1024xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = math.absf %a : f32
         %1 = math.ceil %0 : f32
diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector_peeled.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector_peeled.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_vector_peeled.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_vector_peeled.mlir
@@ -54,7 +54,7 @@
 func.func @mul_s(%arga: tensor<1024xf32, #SparseVector>, %argb: tensor<1024xf32>, %argx: tensor<1024xf32>) -> tensor<1024xf32> {
   %0 = linalg.generic #trait_mul_s
     ins(%arga, %argb: tensor<1024xf32, #SparseVector>, tensor<1024xf32>)
-    outs(%argx: tensor<1024xf32>) {
+    inits(%argx: tensor<1024xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
diff --git a/mlir/test/Dialect/SparseTensor/vectorize_reduction.mlir b/mlir/test/Dialect/SparseTensor/vectorize_reduction.mlir
--- a/mlir/test/Dialect/SparseTensor/vectorize_reduction.mlir
+++ b/mlir/test/Dialect/SparseTensor/vectorize_reduction.mlir
@@ -101,7 +101,7 @@
  -> tensor<f64> {
   %0 = linalg.generic #trait
      ins(%arga: tensor<?x128xf64, #SparseVector>)
-      outs(%argx: tensor<f64>) {
+      inits(%argx: tensor<f64>) {
       ^bb(%a: f64, %x: f64):
         %t = arith.mulf %x, %a: f64
         linalg.yield %t : f64
@@ -191,7 +191,7 @@
  -> tensor<f64> {
   %0 = linalg.generic #trait
      ins(%arga: tensor<?x128xf64, #SparseVector>)
-      outs(%argx: tensor<f64>) {
+      inits(%argx: tensor<f64>) {
       ^bb(%a: f64, %x: f64):
         %t = arith.mulf %x, %a: f64
         linalg.yield %t : f64
@@ -272,7 +272,7 @@
  -> tensor<i13> {
   %0 = linalg.generic #trait
      ins(%arga: tensor<?xi13, #SparseVector>)
-      outs(%argx: tensor<i13>) {
+      inits(%argx: tensor<i13>) {
       ^bb(%a: i13, %x: i13):
         %t = arith.ori %x, %a: i13
         linalg.yield %t : i13
@@ -350,7 +350,7 @@
  -> tensor<i13> {
   %0 = linalg.generic #trait
      ins(%arga: tensor<?xi13, #SparseVector>)
-      outs(%argx: tensor<i13>) {
+      inits(%argx: tensor<i13>) {
       ^bb(%a: i13, %x: i13):
         %t = arith.ori %a, %x: i13
         linalg.yield %t : i13
@@ -425,7 +425,7 @@
  -> tensor<i32> {
   %0 = linalg.generic #trait
      ins(%arga: tensor<?xi32, #SparseVector>)
-      outs(%argx: tensor<i32>) {
+      inits(%argx: tensor<i32>) {
       ^bb(%a: i32, %x: i32):
         %t = arith.subi %x, %a: i32
         linalg.yield %t : i32
@@ -503,7 +503,7 @@
  -> tensor<i32> {
   %0 = linalg.generic #trait
      ins(%arga: tensor<?xi32, #SparseVector>)
-      outs(%argx: tensor<i32>) {
+      inits(%argx: tensor<i32>) {
       ^bb(%a: i32, %x: i32):
         %t = arith.xori %x, %a: i32
         linalg.yield %t : i32
@@ -577,7 +577,7 @@
  -> tensor<i32> {
   %0 = linalg.generic #trait
      ins(%arga: tensor<?xi32, #SparseVector>)
-      outs(%argx: tensor<i32>) {
+      inits(%argx: tensor<i32>) {
       ^bb(%a: i32, %x: i32):
         %t = arith.andi %x, %a: i32
         linalg.yield %t : i32
@@ -652,7 +652,7 @@
  -> tensor<i32> {
   %0 = linalg.generic #trait
      ins(%arga: tensor<?xi32, #SparseVector>)
-      outs(%argx: tensor<i32>) {
+      inits(%argx: tensor<i32>) {
       ^bb(%a: i32, %x: i32):
         %t = arith.muli %x, %a: i32
         linalg.yield %t : i32
@@ -726,7 +726,7 @@
  -> tensor<i32> {
   %0 = linalg.generic #trait
      ins(%arga: tensor<?xi32, #SparseVector>)
-      outs(%argx: tensor<i32>) {
+      inits(%argx: tensor<i32>) {
       ^bb(%a: i32, %x: i32):
         %t = arith.addi %x, %a: i32
         linalg.yield %t : i32
@@ -800,7 +800,7 @@
  -> tensor<f32> {
   %0 = linalg.generic #trait
      ins(%arga: tensor<?xf32, #SparseVector>)
-      outs(%argx: tensor<f32>) {
+      inits(%argx: tensor<f32>) {
       ^bb(%a: f32, %x: f32):
         %t = arith.subf %x, %a: f32
         linalg.yield %t : f32
@@ -874,7 +874,7 @@
  -> tensor<f32> {
   %0 = linalg.generic #trait
      ins(%arga: tensor<?xf32, #SparseVector>)
-      outs(%argx: tensor<f32>) {
+      inits(%argx: tensor<f32>) {
       ^bb(%a: f32, %x: f32):
         %t = arith.addf %x, %a: f32
         linalg.yield %t : f32
diff --git a/mlir/test/Dialect/Tensor/bufferize.mlir b/mlir/test/Dialect/Tensor/bufferize.mlir
--- a/mlir/test/Dialect/Tensor/bufferize.mlir
+++ b/mlir/test/Dialect/Tensor/bufferize.mlir
@@ -200,7 +200,7 @@
 // CHECK-DAG:       %[[ALLOC:.*]] = memref.alloc(%[[DYNAMIC_EXTENT]]) {{.*}} : memref<?xindex>
 // CHECK:           %[[ALLOC_T:.*]] = bufferization.to_tensor %[[ALLOC]]
 // CHECK:           %[[MAPPED:.*]] = linalg.map
-// CHECK:                 outs(%[[ALLOC_T]] : tensor<?xindex>)
+// CHECK:                 inits(%[[ALLOC_T]] : tensor<?xindex>)
 // CHECK:             %[[INDEX:.*]] = linalg.index 0 : index
 // CHECK:             %[[ELEM:.*]] = memref.dim %[[ARG_M]], %[[INDEX]] : memref<*xf32>
 // CHECK:             linalg.yield %[[ELEM]]
@@ -226,7 +226,7 @@
 // CHECK:           %[[ALLOC:.*]] = memref.alloc(%[[DYNAMIC_EXTENT]]) {{.*}} : memref<16x?xindex>
 // CHECK:           %[[ALLOC_T:.*]] = bufferization.to_tensor %[[ALLOC]]
 // CHECK:           %[[MAPPED:.*]] = linalg.map
-// CHECK:                 outs(%[[ALLOC_T]] : tensor<16x?xindex>)
+// CHECK:                 inits(%[[ALLOC_T]] : tensor<16x?xindex>)
 // CHECK:             %[[INDEX0:.*]] = linalg.index 0
 // CHECK:             %[[INDEX1:.*]] = linalg.index 1
 // CHECK:             %[[ADD:.*]] = arith.addi %[[INDEX0]], %[[INDEX1]]
@@ -563,7 +563,7 @@
   // CHECK:     %[[alloc:.*]] = memref.alloc(%[[size0]], %[[size1]]) {{.*}} : memref<?x?xindex>
   // CHECK:     %[[alloc_t:.*]] = bufferization.to_tensor %[[alloc]]
   // CHECK:     %[[mapped:.*]] = linalg.map
-  // CHECK:           outs(%[[alloc_t]] : tensor<?x?xindex>)
+  // CHECK:           inits(%[[alloc_t]] : tensor<?x?xindex>)
   // CHECK:       %[[index0:.*]] = linalg.index 0
   // CHECK:       %[[index1:.*]] = linalg.index 1
   // CHECK:       %[[mul:.*]] = arith.muli %[[index0]], %[[index1]]
diff --git a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
--- a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
@@ -70,8 +70,8 @@
   %r0 = tensor.insert_slice %t into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
 
   /// Overwrite A inplace.
-  //      CHECK: linalg.fill ins({{.*}}{{.*}}outs(%[[A]]
-  %r1 = linalg.fill ins(%f0 : f32) outs(%r0 : tensor<?xf32>) -> tensor<?xf32>
+  //      CHECK: linalg.fill ins({{.*}}{{.*}}inits(%[[A]]
+  %r1 = linalg.fill ins(%f0 : f32) inits(%r0 : tensor<?xf32>) -> tensor<?xf32>
 
   //     CHECK: return
   // CHECK-NOT: tensor
@@ -90,8 +90,8 @@
 {
   %f0 = arith.constant 0.0 : f32
 
-  //      CHECK: linalg.fill ins({{.*}}{{.*}}outs(%[[A]]
-  %r0 = linalg.fill ins(%f0 : f32) outs(%A : tensor<?xf32>) -> tensor<?xf32>
+  //      CHECK: linalg.fill ins({{.*}}{{.*}}inits(%[[A]]
+  %r0 = linalg.fill ins(%f0 : f32) inits(%A : tensor<?xf32>) -> tensor<?xf32>
 
   //  CHECK-NOT: alloc
   //      CHECK: %[[SV_A:.*]] = memref.subview %[[A]]
@@ -255,7 +255,7 @@
   // CHECK: memref.alloc
   %cst = arith.constant 4.200000e+01 : f32
   // CHECK: linalg.fill
-  %0 = linalg.fill ins(%cst : f32) outs(%t : tensor<10xf32>) -> tensor<10xf32>
+  %0 = linalg.fill ins(%cst : f32) inits(%t : tensor<10xf32>) -> tensor<10xf32>
   // CHECK: memref.copy
   %1 = tensor.insert_slice %0 into %t[0][10][1] : tensor<10xf32> into tensor<10xf32>
   return %1 : tensor<10xf32>
@@ -273,7 +273,7 @@
       {memory_space = 3 : i64} : tensor<?xf32>
   // CHECK: %[[padded_alloc:.*]] = memref.alloc() {{.*}} : memref<15xf32, 3>
   // CHECK: linalg.map
-  // CHECK:     outs(%[[padded_alloc]] : memref<15xf32, 3>)
+  // CHECK:     inits(%[[padded_alloc]] : memref<15xf32, 3>)
   // CHECK:   linalg.yield %{{.*}}
   // CHECK: }
   // CHECK: %[[subview:.*]] = memref.subview {{.*}} : memref<15xf32, 3> to memref<?xf32, strided<[1], offset: 2>, 3>
@@ -297,8 +297,8 @@
   %cst = arith.constant 0.0 : f32
   %c0 = arith.constant 0 : index
   // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10xf32>
-  // CHECK: linalg.fill {{.*}} outs(%[[alloc]] : memref<10xf32>)
-  %1 = linalg.fill ins(%cst : f32) outs(%t : tensor<10xf32>) -> tensor<10xf32>
+  // CHECK: linalg.fill {{.*}} inits(%[[alloc]] : memref<10xf32>)
+  %1 = linalg.fill ins(%cst : f32) inits(%t : tensor<10xf32>) -> tensor<10xf32>
 
   // Read %1 so that it does not DCE away.
   %vec = vector.transfer_read %1[%c0], %cst : tensor<10xf32>, vector<10xf32>
@@ -318,8 +318,8 @@
 func.func @insert_slice_full_overwrite(%t: tensor<10xf32>, %b: tensor<10xf32>) -> tensor<10xf32> {
   %cst = arith.constant 0.0 : f32
   %c0 = arith.constant 0 : index
-  // CHECK: linalg.fill {{.*}} outs(%[[t]] : memref<10xf32,{{.*}}>)
-  %1 = linalg.fill ins(%cst : f32) outs(%t : tensor<10xf32>) -> tensor<10xf32>
+  // CHECK: linalg.fill {{.*}} inits(%[[t]] : memref<10xf32,{{.*}}>)
+  %1 = linalg.fill ins(%cst : f32) inits(%t : tensor<10xf32>) -> tensor<10xf32>
 
   // Read %1 so that it does not DCE away.
   %vec = vector.transfer_read %1[%c0], %cst : tensor<10xf32>, vector<10xf32>
diff --git a/mlir/test/Dialect/Transform/selective-targeting.mlir b/mlir/test/Dialect/Transform/selective-targeting.mlir
--- a/mlir/test/Dialect/Transform/selective-targeting.mlir
+++ b/mlir/test/Dialect/Transform/selective-targeting.mlir
@@ -12,7 +12,7 @@
   // CHECK-SAME: -> tensor<4x4xf32>
   %0 = linalg.matmul { test.attrA }
                       ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>)
-                     outs(%arg2: tensor<128x128xf32>)
+                     inits(%arg2: tensor<128x128xf32>)
     -> tensor<128x128xf32>
   func.return %0 : tensor<128x128xf32>
 }
@@ -30,7 +30,7 @@
   // CHECK:       vector.transfer_write
   %0 = linalg.matmul { test.attrA, test.attrC }
                       ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>)
-                     outs(%arg2: tensor<128x128xf32>)
+                     inits(%arg2: tensor<128x128xf32>)
     -> tensor<128x128xf32>
   func.return %0 : tensor<128x128xf32>
 }
@@ -47,7 +47,7 @@
   // CHECK: vector.transfer_write
   %0 = linalg.matmul { test.attrC }
                       ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>)
-                     outs(%arg2: tensor<128x128xf32>)
+                     inits(%arg2: tensor<128x128xf32>)
     -> tensor<128x128xf32>
   func.return %0 : tensor<128x128xf32>
 }
@@ -94,7 +94,7 @@
   // CHECK: vector.contract
   %0 = linalg.matmul {test.attrA}
                      ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>)
-                     outs(%arg2: tensor<128x128xf32>)
+                     inits(%arg2: tensor<128x128xf32>)
     -> tensor<128x128xf32>
   func.return %0 : tensor<128x128xf32>
 }
@@ -105,7 +105,7 @@
     -> tensor<128x128xf32> {
   // CHECK: linalg.matmul
   %0 = linalg.matmul ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>)
-                     outs(%arg2: tensor<128x128xf32>)
+                     inits(%arg2: tensor<128x128xf32>)
     -> tensor<128x128xf32>
   func.return %0 : tensor<128x128xf32>
 }
@@ -139,11 +139,11 @@
   // CHECK: vector.contract
   %0 = linalg.matmul {test.attrA}
                      ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>)
-                     outs(%arg2: tensor<128x128xf32>)
+                     inits(%arg2: tensor<128x128xf32>)
     -> tensor<128x128xf32>
   // CHECK: vector.contract
   %1 = linalg.matmul ins(%arg0, %0: tensor<128x128xf32>, tensor<128x128xf32>)
-                     outs(%arg3: tensor<128x128xf32>)
+                     inits(%arg3: tensor<128x128xf32>)
     -> tensor<128x128xf32>
   return %1 : tensor<128x128xf32>
 }
diff --git a/mlir/test/Dialect/Vector/transform-vector.mlir b/mlir/test/Dialect/Vector/transform-vector.mlir
--- a/mlir/test/Dialect/Vector/transform-vector.mlir
+++ b/mlir/test/Dialect/Vector/transform-vector.mlir
@@ -8,7 +8,7 @@
 // CHECK: vector.extract {{.*}} : vector<8x4xf32>
 // CHECK: vector.store {{.*}} : memref<8x32xf32>, vector<4xf32>
   %0 = linalg.matmul  ins(%arg0, %arg1: tensor<8x16xf32>, tensor<16x32xf32>)
-                     outs(%arg2: tensor<8x32xf32>)
+                     inits(%arg2: tensor<8x32xf32>)
     -> tensor<8x32xf32>
   return %0 : tensor<8x32xf32>
 }
diff --git a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir
--- a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir
@@ -72,7 +72,7 @@
   //      LINALG:   scf.yield %[[A]], %[[i]], %[[j]] : memref<?x8xf32>, index, index
   //      LINALG: } else {
   //               slow path, fill tmp alloc and yield a memref_casted version of it
-  //      LINALG:   linalg.fill ins(%cst : f32) outs(%[[alloc]] : memref<4x8xf32>)
+  //      LINALG:   linalg.fill ins(%cst : f32) inits(%[[alloc]] : memref<4x8xf32>)
   //      LINALG:   %[[d0:.*]] = memref.dim %[[A]], %[[c0]] : memref<?x8xf32>
   //      LINALG:   %[[sv0:.*]] = affine.min #[[$bounds_map_4]](%[[d0]], %[[i]], %[[c4]])
   //      LINALG:   %[[sv1:.*]] = affine.min #[[$bounds_map_8]](%[[c8]], %[[j]], %[[c8]])
@@ -165,7 +165,7 @@
   // LINALG-SAME:     memref<?x8xf32, strided<[?, 1], offset: ?>>, index, index
   //      LINALG: } else {
   //               slow path, fill tmp alloc and yield a memref_casted version of it
-  //      LINALG:   linalg.fill ins(%cst : f32) outs(%[[alloc]] : memref<4x8xf32>)
+  //      LINALG:   linalg.fill ins(%cst : f32) inits(%[[alloc]] : memref<4x8xf32>)
   //      LINALG:   %[[sv0:.*]] = affine.min #[[$bounds_map_4]](%[[c7]], %[[i]], %[[c4]])
   //      LINALG:   %[[sv1:.*]] = affine.min #[[$bounds_map_8]](%[[c8]], %[[j]], %[[c8]])
   //      LINALG:   %[[sv:.*]] = memref.subview %[[A]][%[[i]], %[[j]]] [%[[sv0]], %[[sv1]]] [1, 1]
diff --git a/mlir/test/IR/slice.mlir b/mlir/test/IR/slice.mlir
--- a/mlir/test/IR/slice.mlir
+++ b/mlir/test/IR/slice.mlir
@@ -6,9 +6,9 @@
   %c = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
   %d = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
   linalg.matmul ins(%a, %b : memref<?x?xf32>, memref<?x?xf32>)
-               outs(%c : memref<?x?xf32>)
+               inits(%c : memref<?x?xf32>)
   linalg.matmul ins(%a, %b : memref<?x?xf32>, memref<?x?xf32>)
-               outs(%d : memref<?x?xf32>)
+               inits(%d : memref<?x?xf32>)
   memref.dealloc %c : memref<?x?xf32>
   memref.dealloc %b : memref<?x?xf32>
   memref.dealloc %a : memref<?x?xf32>
diff --git a/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir b/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir
--- a/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir
+++ b/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir
@@ -43,7 +43,7 @@
     iterator_types = ["parallel", "parallel"]
   }
     ins(%lhs, %rhs : memref<?x?xf32>, memref<?x?xf32>)
-    outs(%sum : memref<?x?xf32>)
+    inits(%sum : memref<?x?xf32>)
   {
     ^bb0(%lhs_in: f32, %rhs_in: f32, %sum_out: f32):
       %0 = arith.addf %lhs_in, %rhs_in : f32
@@ -68,8 +68,8 @@
   %RHS10 = memref.alloc() {alignment = 64} : memref<1x10xf32>
   %DST10 = memref.alloc() {alignment = 64} : memref<1x10xf32>
 
-  linalg.fill ins(%f1 : f32) outs(%LHS10 : memref<1x10xf32>)
-  linalg.fill ins(%f1 : f32) outs(%RHS10 : memref<1x10xf32>)
+  linalg.fill ins(%f1 : f32) inits(%LHS10 : memref<1x10xf32>)
+  linalg.fill ins(%f1 : f32) inits(%RHS10 : memref<1x10xf32>)
 
   %LHS = memref.cast %LHS10 : memref<1x10xf32> to memref<?x?xf32>
   %RHS = memref.cast %RHS10 : memref<1x10xf32> to memref<?x?xf32>
diff --git a/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir b/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir
--- a/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir
+++ b/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir
@@ -90,8 +90,8 @@
   %RHS10 = memref.alloc() {alignment = 64} : memref<1x10xf32>
   %DST10 = memref.alloc() {alignment = 64} : memref<1x10xf32>
 
-  linalg.fill ins(%f1 : f32) outs(%LHS10 : memref<1x10xf32>)
-  linalg.fill ins(%f1 : f32) outs(%RHS10 : memref<1x10xf32>)
+  linalg.fill ins(%f1 : f32) inits(%LHS10 : memref<1x10xf32>)
+  linalg.fill ins(%f1 : f32) inits(%RHS10 : memref<1x10xf32>)
 
   %LHS = memref.cast %LHS10 : memref<1x10xf32> to memref<?x?xf32>
   %RHS = memref.cast %RHS10 : memref<1x10xf32> to memref<?x?xf32>
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir
@@ -12,9 +12,9 @@
   %x = memref.dim %A, %c0 : memref<?x?xf32>
   %y = memref.dim %B, %c1 : memref<?x?xf32>
   %C = memref.alloc(%x, %y) : memref<?x?xf32>
-  linalg.fill ins(%f0 : f32) outs(%C : memref<?x?xf32>)
+  linalg.fill ins(%f0 : f32) inits(%C : memref<?x?xf32>)
   linalg.matmul ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>)
-                outs(%C: memref<?x?xf32>)
+                inits(%C: memref<?x?xf32>)
   return %C : memref<?x?xf32>
 }
 
@@ -26,12 +26,12 @@
   %x = memref.dim %A, %c1 : memref<?x?xf32>
   %n = memref.dim %B, %c1 : memref<?x?xf32>
   %C = memref.alloc(%m, %n) : memref<?x?xf32>
-  linalg.fill ins(%f0 : f32) outs(%C : memref<?x?xf32>)
+  linalg.fill ins(%f0 : f32) inits(%C : memref<?x?xf32>)
   scf.for %i = %c0 to %n step %c1 {
     %b = memref.subview %B[0, %i][%x, 1][1, 1] : memref<?x?xf32> to memref<?xf32, strided<[?], offset: ?>>
     %c = memref.subview %C[0, %i][%m, 1][1, 1] : memref<?x?xf32> to memref<?xf32, strided<[?], offset: ?>>
     linalg.matvec ins(%A, %b: memref<?x?xf32>, memref<?xf32, strided<[?], offset: ?>>)
-                  outs(%c: memref<?xf32, strided<[?], offset: ?>>)
+                  inits(%c: memref<?xf32, strided<[?], offset: ?>>)
   }
   return %C : memref<?x?xf32>
 }
@@ -46,8 +46,8 @@
   %val2 = arith.constant 17.0 : f32
   %A = memref.alloc(%m, %x) : memref<?x?xf32>
   %B = memref.alloc(%x, %n) : memref<?x?xf32>
-  linalg.fill ins(%val1 : f32) outs(%A : memref<?x?xf32>)
-  linalg.fill ins(%val2 : f32) outs(%B : memref<?x?xf32>)
+  linalg.fill ins(%val1 : f32) inits(%A : memref<?x?xf32>)
+  linalg.fill ins(%val2 : f32) inits(%B : memref<?x?xf32>)
   memref.store %val1, %B[%c0, %c0] : memref<?x?xf32>
   %C1 = call @matmul(%A, %B) : (memref<?x?xf32>, memref<?x?xf32>) -> memref<?x?xf32>
   %C2 = call @matvec(%A, %B) : (memref<?x?xf32>, memref<?x?xf32>) -> memref<?x?xf32>
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir
@@ -14,7 +14,7 @@
 // Creates and returns a 1-D buffer of size %s1 filled with the value %f
 func.func @alloc_1d_filled_f32(%s1 : index, %f : f32) -> memref<?xf32> {
   %buf = memref.alloc(%s1) : memref<?xf32>
-  linalg.fill ins(%f : f32) outs(%buf : memref<?xf32>)
+  linalg.fill ins(%f : f32) inits(%buf : memref<?xf32>)
   return %buf : memref<?xf32>
 }
 
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir
@@ -14,7 +14,7 @@
 // Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f
 func.func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref<?x?x?xf32> {
   %buf = memref.alloc(%s1, %s2, %s3) : memref<?x?x?xf32>
-  linalg.fill ins(%f : f32) outs(%buf : memref<?x?x?xf32>)
+  linalg.fill ins(%f : f32) inits(%buf : memref<?x?x?xf32>)
   return %buf : memref<?x?x?xf32>
 }
 
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir
@@ -14,7 +14,7 @@
 // Creates and returns a 2-D buffer of size (%s1, %s2) filled with the value %f
 func.func @alloc_2d_filled_f32(%s1 : index, %s2 : index, %f : f32) -> memref<?x?xf32> {
   %buf = memref.alloc(%s1, %s2) : memref<?x?xf32>
-  linalg.fill ins(%f : f32) outs(%buf : memref<?x?xf32>)
+  linalg.fill ins(%f : f32) inits(%buf : memref<?x?xf32>)
   return %buf : memref<?x?xf32>
 }
 
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir
@@ -14,7 +14,7 @@
 // Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f
 func.func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> memref<?x?x?x?xf32> {
   %buf = memref.alloc(%s1, %s2, %s3, %s4) : memref<?x?x?x?xf32>
-  linalg.fill ins(%f : f32) outs(%buf : memref<?x?x?x?xf32>)
+  linalg.fill ins(%f : f32) inits(%buf : memref<?x?x?x?xf32>)
   return %buf : memref<?x?x?x?xf32>
 }
 
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir
@@ -14,7 +14,7 @@
 // Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f
 func.func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref<?x?x?xf32> {
   %buf = memref.alloc(%s1, %s2, %s3) : memref<?x?x?xf32>
-  linalg.fill ins(%f : f32) outs(%buf : memref<?x?x?xf32>)
+  linalg.fill ins(%f : f32) inits(%buf : memref<?x?x?xf32>)
   return %buf : memref<?x?x?xf32>
 }
 
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir
@@ -14,7 +14,7 @@
 // Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f
 func.func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> memref<?x?x?x?x?xf32> {
   %buf = memref.alloc(%s1, %s2, %s3, %s4, %s5) : memref<?x?x?x?x?xf32>
-  linalg.fill ins(%f : f32) outs(%buf : memref<?x?x?x?x?xf32>)
+  linalg.fill ins(%f : f32) inits(%buf : memref<?x?x?x?x?xf32>)
   return %buf : memref<?x?x?x?x?xf32>
 }
 
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir
@@ -14,7 +14,7 @@
   %cst = arith.constant 0.000000e+00 : f32
   %c2 = arith.constant 2 : index
   %c0 = arith.constant 0 : index
-  %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<f32>) -> tensor<f32>
+  %0 = linalg.fill ins(%cst : f32) inits(%arg2 : tensor<f32>) -> tensor<f32>
   %1 = affine.apply #map0(%c0, %c64)[%c2]
   %2 = bufferization.alloc_tensor(%1) : tensor<?x2xf32>
   %3 = scf.for %arg3 = %c0 to %c64 step %c2 iter_args(%arg4 = %2) -> (tensor<?x2xf32>) {
@@ -61,7 +61,7 @@
     %13 = tensor.extract_slice %6[%12, 0] [1, 2] [1, 1] : tensor<?x2xf32> to tensor<2xf32>
     %14 = affine.apply #map1(%arg3, %c0)[%c2]
     %15 = tensor.extract_slice %3[%14, 0] [1, 2] [1, 1] : tensor<?x2xf32> to tensor<2xf32>
-    %16 = linalg.dot ins(%13, %15 : tensor<2xf32>, tensor<2xf32>) outs(%arg4 : tensor<f32>) -> tensor<f32>
+    %16 = linalg.dot ins(%13, %15 : tensor<2xf32>, tensor<2xf32>) inits(%arg4 : tensor<f32>) -> tensor<f32>
 
     // %AA = tensor.cast %13 : tensor<2xf32> to tensor<*xf32>
     // call @printMemrefF32(%AA) : (tensor<*xf32>) -> ()
@@ -83,9 +83,9 @@
   %A = bufferization.alloc_tensor() : tensor<64xf32>
   %B = bufferization.alloc_tensor() : tensor<64xf32>
   %C = bufferization.alloc_tensor() : tensor<f32>
-  %AA = linalg.fill ins(%v1 : f32) outs(%A : tensor<64xf32>) -> tensor<64xf32>
-  %BB = linalg.fill ins(%v2 : f32) outs(%B : tensor<64xf32>) -> tensor<64xf32>
-  %CC = linalg.fill ins(%v0 : f32) outs(%C : tensor<f32>) -> tensor<f32>
+  %AA = linalg.fill ins(%v1 : f32) inits(%A : tensor<64xf32>) -> tensor<64xf32>
+  %BB = linalg.fill ins(%v2 : f32) inits(%B : tensor<64xf32>) -> tensor<64xf32>
+  %CC = linalg.fill ins(%v0 : f32) inits(%C : tensor<f32>) -> tensor<f32>
 
   %res = call @init_and_dot(%AA, %BB, %CC) :
     (tensor<64xf32>, tensor<64xf32>, tensor<f32>) -> tensor<f32>
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir
@@ -23,7 +23,7 @@
   %C = arith.constant dense<1000.0> : tensor<2x4xf32>
 
   %D = linalg.matmul ins(%A, %B: tensor<2x3xf32>, tensor<3x4xf32>)
-                     outs(%C: tensor<2x4xf32>) -> tensor<2x4xf32>
+                     inits(%C: tensor<2x4xf32>) -> tensor<2x4xf32>
 
   %unranked = tensor.cast %D : tensor<2x4xf32> to tensor<*xf32>
   call @printMemrefF32(%unranked) : (tensor<*xf32>) -> ()
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir
@@ -73,7 +73,7 @@
     %init = bufferization.alloc_tensor(%d0, %d1) : tensor<?x?xf64, #DenseMatrix>
     %0 = linalg.generic #trait_assign
        ins(%arga: tensor<?x?xf64, #SparseMatrix>)
-      outs(%init: tensor<?x?xf64, #DenseMatrix>) {
+      inits(%init: tensor<?x?xf64, #DenseMatrix>) {
       ^bb(%a: f64, %x: f64):
         %0 = arith.mulf %a, %c2 : f64
         linalg.yield %0 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_bf16.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_bf16.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_bf16.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_bf16.mlir
@@ -39,7 +39,7 @@
     %xv = bufferization.alloc_tensor (%d) : tensor<?xbf16, #DenseVector>
     %0 = linalg.generic #trait_vec_op
        ins(%arga, %argb: tensor<?xbf16, #SparseVector>, tensor<?xbf16, #SparseVector>)
-        outs(%xv: tensor<?xbf16, #DenseVector>) {
+        inits(%xv: tensor<?xbf16, #DenseVector>) {
         ^bb(%a: bf16, %b: bf16, %x: bf16):
           %1 = sparse_tensor.binary %a, %b : bf16, bf16 to bf16
             overlap={
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_f16.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_f16.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_f16.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_f16.mlir
@@ -48,7 +48,7 @@
     %xv = bufferization.alloc_tensor (%d) : tensor<?xf16, #DenseVector>
     %0 = linalg.generic #trait_vec_op
        ins(%arga, %argb: tensor<?xf16, #SparseVector>, tensor<?xf16, #SparseVector>)
-        outs(%xv: tensor<?xf16, #DenseVector>) {
+        inits(%xv: tensor<?xf16, #DenseVector>) {
         ^bb(%a: f16, %b: f16, %x: f16):
           %1 = sparse_tensor.binary %a, %b : f16, f16 to f16
             overlap={
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/reshape_dot.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/reshape_dot.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/reshape_dot.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/reshape_dot.mlir
@@ -27,8 +27,8 @@
     %collapsed = tensor.collapse_shape %arg1 [[0], [1, 2]] : tensor<6x2x3xf32, #COO_3D> into tensor<6x6xf32, #COO_2D>
     %0 = tensor.empty() : tensor<5x6xf32>
     %cst = arith.constant 0.000000e+00 : f32
-    %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<5x6xf32>) -> tensor<5x6xf32>
-    %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32>, tensor<6x6xf32, #COO_2D>) outs(%1 : tensor<5x6xf32>) -> tensor<5x6xf32>
+    %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<5x6xf32>) -> tensor<5x6xf32>
+    %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32>, tensor<6x6xf32, #COO_2D>) inits(%1 : tensor<5x6xf32>) -> tensor<5x6xf32>
     %expanded = tensor.expand_shape %2 [[0], [1, 2]] : tensor<5x6xf32> into tensor<5x2x3xf32>
     %ret1 = tensor.cast %expanded : tensor<5x2x3xf32> to tensor<?x?x?xf32>
     return %ret1 : tensor<?x?x?xf32>
@@ -38,8 +38,8 @@
     %collapsed = tensor.collapse_shape %arg1 [[0], [1, 2]] : tensor<6x2x3xf32, #COO_3D> into tensor<6x6xf32, #COO_2D>
     %0 = tensor.empty() : tensor<5x6xf32>
     %cst = arith.constant 0.000000e+00 : f32
-    %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<5x6xf32>) -> tensor<5x6xf32>
-    %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32, #COO_2D>, tensor<6x6xf32, #COO_2D>) outs(%1 : tensor<5x6xf32>) -> tensor<5x6xf32>
+    %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<5x6xf32>) -> tensor<5x6xf32>
+    %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32, #COO_2D>, tensor<6x6xf32, #COO_2D>) inits(%1 : tensor<5x6xf32>) -> tensor<5x6xf32>
     %expanded = tensor.expand_shape %2 [[0], [1, 2]] : tensor<5x6xf32> into tensor<5x2x3xf32>
     %ret1 = tensor.cast %expanded : tensor<5x2x3xf32> to tensor<?x?x?xf32>
     return %ret1 : tensor<?x?x?xf32>
@@ -49,8 +49,8 @@
     %collapsed = tensor.collapse_shape %arg1 [[0], [1, 2]] : tensor<6x2x3xf32> into tensor<6x6xf32>
     %0 = tensor.empty() : tensor<5x6xf32>
     %cst = arith.constant 0.000000e+00 : f32
-    %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<5x6xf32>) -> tensor<5x6xf32>
-    %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32>, tensor<6x6xf32>) outs(%1 : tensor<5x6xf32>) -> tensor<5x6xf32>
+    %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<5x6xf32>) -> tensor<5x6xf32>
+    %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32>, tensor<6x6xf32>) inits(%1 : tensor<5x6xf32>) -> tensor<5x6xf32>
     %expanded = tensor.expand_shape %2 [[0], [1, 2]] : tensor<5x6xf32> into tensor<5x2x3xf32>
     %ret1 = tensor.cast %expanded : tensor<5x2x3xf32> to tensor<?x?x?xf32>
     return %ret1 :  tensor<?x?x?xf32>
@@ -61,8 +61,8 @@
     %collapsed = tensor.collapse_shape %arg1 [[0, 1], [2]] : tensor<2x3x6xf32, #COO_3D> into tensor<6x6xf32, #COO_2D>
     %0 = tensor.empty() : tensor<5x6xf32>
     %cst = arith.constant 0.000000e+00 : f32
-    %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<5x6xf32>) -> tensor<5x6xf32>
-    %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32, #COO_2D>, tensor<6x6xf32, #COO_2D>) outs(%1 : tensor<5x6xf32>) -> tensor<5x6xf32>
+    %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<5x6xf32>) -> tensor<5x6xf32>
+    %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32, #COO_2D>, tensor<6x6xf32, #COO_2D>) inits(%1 : tensor<5x6xf32>) -> tensor<5x6xf32>
     %expanded = tensor.expand_shape %2 [[0], [1, 2]] : tensor<5x6xf32> into tensor<5x2x3xf32>
     %ret1 = tensor.cast %expanded : tensor<5x2x3xf32> to tensor<?x?x?xf32>
     return %ret1 : tensor<?x?x?xf32>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_abs.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_abs.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_abs.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_abs.mlir
@@ -45,7 +45,7 @@
     %xin = bufferization.alloc_tensor(%d) : tensor<?xf64, #SparseVector>
     %0 = linalg.generic #trait_op
       ins(%arg0: tensor<?xf64, #SparseVector>)
-      outs(%xin: tensor<?xf64, #SparseVector>) {
+      inits(%xin: tensor<?xf64, #SparseVector>) {
       ^bb0(%a: f64, %x: f64) :
         %result = math.absf %a : f64
         linalg.yield %result : f64
@@ -60,7 +60,7 @@
     %xin = bufferization.alloc_tensor(%d) : tensor<?xi32, #SparseVector>
     %0 = linalg.generic #trait_op
       ins(%arg0: tensor<?xi32, #SparseVector>)
-      outs(%xin: tensor<?xi32, #SparseVector>) {
+      inits(%xin: tensor<?xi32, #SparseVector>) {
       ^bb0(%a: i32, %x: i32) :
         %result = math.absi %a : i32
         linalg.yield %result : i32
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_binary.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_binary.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_binary.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_binary.mlir
@@ -72,7 +72,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xi32, #SparseVector>
     %0 = linalg.generic #trait_vec_op
        ins(%arga, %argb: tensor<?xi32, #SparseVector>, tensor<?xi32, #SparseVector>)
-        outs(%xv: tensor<?xi32, #SparseVector>) {
+        inits(%xv: tensor<?xi32, #SparseVector>) {
         ^bb(%a: i32, %b: i32, %x: i32):
           %1 = sparse_tensor.binary %a, %b : i32, i32 to i32
             overlap={
@@ -96,7 +96,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xf64, #SparseVector>
     %0 = linalg.generic #trait_vec_op
        ins(%arga, %argb: tensor<?xf64, #SparseVector>, tensor<?xf64>)
-        outs(%xv: tensor<?xf64, #SparseVector>) {
+        inits(%xv: tensor<?xf64, #SparseVector>) {
         ^bb(%a: f64, %b: f64, %x: f64):
           %1 = sparse_tensor.binary %a, %b : f64, f64 to f64
             overlap={
@@ -120,7 +120,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xf64, #SparseVector>
     %0 = linalg.generic #trait_vec_op
        ins(%arga, %argb: tensor<?xf64, #SparseVector>, tensor<?xf64, #SparseVector>)
-        outs(%xv: tensor<?xf64, #SparseVector>) {
+        inits(%xv: tensor<?xf64, #SparseVector>) {
         ^bb(%a: f64, %b: f64, %x: f64):
           %1 = sparse_tensor.binary %a, %b : f64, f64 to f64
             overlap={}
@@ -138,7 +138,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xi32, #SparseVector>
     %0 = linalg.generic #trait_vec_scale
        ins(%arga: tensor<?xf64, #SparseVector>)
-        outs(%xv: tensor<?xi32, #SparseVector>) {
+        inits(%xv: tensor<?xi32, #SparseVector>) {
         ^bb(%a: f64, %x: i32):
           %idx = linalg.index 0 : index
           %1 = sparse_tensor.binary %a, %idx : f64, index to i32
@@ -165,7 +165,7 @@
     %xv = bufferization.alloc_tensor(%d0, %d1) : tensor<?x?xf64, #DCSR>
     %0 = linalg.generic #trait_mat_op
        ins(%arga, %argb: tensor<?x?xf64, #DCSR>, tensor<?x?xf64, #DCSR>)
-        outs(%xv: tensor<?x?xf64, #DCSR>) {
+        inits(%xv: tensor<?x?xf64, #DCSR>) {
         ^bb(%a: f64, %b: f64, %x: f64):
           %1 = sparse_tensor.binary %a, %b: f64, f64 to f64
             overlap={
@@ -191,7 +191,7 @@
     %0 = linalg.generic #trait_mat_op
       ins(%A, %B: tensor<4x4xf64, #DCSR>,
                   tensor<4x4xf64, #DCSR>)
-      outs(%C: tensor<4x4xf64, #DCSR>) {
+      inits(%C: tensor<4x4xf64, #DCSR>) {
         ^bb0(%a: f64, %b: f64, %c: f64) :
           %result = sparse_tensor.binary %a, %b : f64, f64 to f64
             overlap={
@@ -213,7 +213,7 @@
     %0 = linalg.generic #trait_mat_op
       ins(%A, %B: tensor<4x4xf64, #DCSR>,
                   tensor<4x4xf64, #DCSR>)
-      outs(%C: tensor<4x4xf64, #DCSR>) {
+      inits(%C: tensor<4x4xf64, #DCSR>) {
         ^bb0(%a: f64, %b: f64, %c: f64) :
           %result = sparse_tensor.binary %a, %b : f64, f64 to f64
             overlap={
@@ -241,7 +241,7 @@
     %0 = linalg.generic #trait_mat_op
       ins(%A, %B: tensor<4x4xf64, #DCSR>,
                   tensor<4x4xf64, #DCSR>)
-      outs(%C: tensor<4x4xf64, #DCSR>) {
+      inits(%C: tensor<4x4xf64, #DCSR>) {
         ^bb0(%a: f64, %b: f64, %c: f64) :
           %row = linalg.index 0 : index
           %col = linalg.index 1 : index
@@ -278,7 +278,7 @@
     %0 = linalg.generic #trait_mat_op
       ins(%A, %B: tensor<4x4xf64, #DCSR>,
                   tensor<4x4xf64, #DCSR>)
-      outs(%C: tensor<4x4xf64, #DCSR>) {
+      inits(%C: tensor<4x4xf64, #DCSR>) {
         ^bb0(%a: f64, %b: f64, %c: f64) :
           %result = sparse_tensor.binary %a, %b : f64, f64 to f64
             overlap={
@@ -323,7 +323,7 @@
     %0 = linalg.generic #trait_mat_op
       ins(%A, %B: tensor<4x4xf64, #DCSR>,
                   tensor<4x4xf64, #DCSR>)
-      outs(%C: tensor<4x4xi8, #DCSR>) {
+      inits(%C: tensor<4x4xi8, #DCSR>) {
         ^bb0(%a: f64, %b: f64, %c: i8) :
           %result = sparse_tensor.binary %a, %b : f64, f64 to i8
             overlap={
@@ -346,7 +346,7 @@
     %0 = linalg.generic #trait_mat_op
       ins(%A, %B: tensor<4x4xf64, #DCSR>,
                   tensor<4x4xf64, #DCSR>)
-      outs(%C: tensor<4x4xf64, #DCSR>) {
+      inits(%C: tensor<4x4xf64, #DCSR>) {
         ^bb0(%a: f64, %b: f64, %c: f64) :
           %result = sparse_tensor.binary %a, %b : f64, f64 to f64
             overlap={}
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir
@@ -63,7 +63,7 @@
                                     %argb: tensor<10xf32>) -> tensor<10xf32> {
     %0 = linalg.generic #trait_cast
       ins(%arga: tensor<10xi32, #SV>)
-      outs(%argb: tensor<10xf32>) {
+      inits(%argb: tensor<10xf32>) {
         ^bb(%a: i32, %x : f32):
           %cst = arith.sitofp %a : i32 to f32
           linalg.yield %cst : f32
@@ -74,7 +74,7 @@
                                     %argb: tensor<10xf32>) -> tensor<10xf32> {
     %0 = linalg.generic #trait_cast
       ins(%arga: tensor<10xi32, #SV>)
-      outs(%argb: tensor<10xf32>) {
+      inits(%argb: tensor<10xf32>) {
         ^bb(%a: i32, %x : f32):
           %cst = arith.uitofp %a : i32 to f32
           linalg.yield %cst : f32
@@ -85,7 +85,7 @@
                                     %argb: tensor<10xi32>) -> tensor<10xi32> {
     %0 = linalg.generic #trait_cast
       ins(%arga: tensor<10xf32, #SV>)
-      outs(%argb: tensor<10xi32>) {
+      inits(%argb: tensor<10xi32>) {
         ^bb(%a: f32, %x : i32):
           %cst = arith.fptosi %a : f32 to i32
           linalg.yield %cst : i32
@@ -96,7 +96,7 @@
                                     %argb: tensor<10xi32>) -> tensor<10xi32> {
     %0 = linalg.generic #trait_cast
       ins(%arga: tensor<10xf64, #SV>)
-      outs(%argb: tensor<10xi32>) {
+      inits(%argb: tensor<10xi32>) {
         ^bb(%a: f64, %x : i32):
           %cst = arith.fptoui %a : f64 to i32
           linalg.yield %cst : i32
@@ -107,7 +107,7 @@
                                     %argb: tensor<10xf64>) -> tensor<10xf64> {
     %0 = linalg.generic #trait_cast
       ins(%arga: tensor<10xf32, #SV>)
-      outs(%argb: tensor<10xf64>) {
+      inits(%argb: tensor<10xf64>) {
         ^bb(%a: f32, %x : f64):
           %cst = arith.extf %a : f32 to f64
           linalg.yield %cst : f64
@@ -118,7 +118,7 @@
                                     %argb: tensor<10xf32>) -> tensor<10xf32> {
     %0 = linalg.generic #trait_cast
       ins(%arga: tensor<10xf64, #SV>)
-      outs(%argb: tensor<10xf32>) {
+      inits(%argb: tensor<10xf32>) {
         ^bb(%a: f64, %x : f32):
           %cst = arith.truncf %a : f64 to f32
           linalg.yield %cst : f32
@@ -129,7 +129,7 @@
                                     %argb: tensor<10xi64>) -> tensor<10xi64> {
     %0 = linalg.generic #trait_cast
       ins(%arga: tensor<10xi32, #SV>)
-      outs(%argb: tensor<10xi64>) {
+      inits(%argb: tensor<10xi64>) {
         ^bb(%a: i32, %x : i64):
           %cst = arith.extsi %a : i32 to i64
           linalg.yield %cst : i64
@@ -140,7 +140,7 @@
                                     %argb: tensor<10xi64>) -> tensor<10xi64> {
     %0 = linalg.generic #trait_cast
       ins(%arga: tensor<10xi32, #SV>)
-      outs(%argb: tensor<10xi64>) {
+      inits(%argb: tensor<10xi64>) {
         ^bb(%a: i32, %x : i64):
           %cst = arith.extui %a : i32 to i64
           linalg.yield %cst : i64
@@ -151,7 +151,7 @@
                                    %argb: tensor<10xi8>) -> tensor<10xi8> {
     %0 = linalg.generic #trait_cast
       ins(%arga: tensor<10xi32, #SV>)
-      outs(%argb: tensor<10xi8>) {
+      inits(%argb: tensor<10xi8>) {
         ^bb(%a: i32, %x : i8):
           %cst = arith.trunci %a : i32 to i8
           linalg.yield %cst : i8
@@ -162,7 +162,7 @@
                                     %argb: tensor<10xi32>) -> tensor<10xi32> {
     %0 = linalg.generic #trait_cast
       ins(%arga: tensor<10xf32, #SV>)
-      outs(%argb: tensor<10xi32>) {
+      inits(%argb: tensor<10xi32>) {
         ^bb(%a: f32, %x : i32):
           %cst = arith.bitcast %a : f32 to i32
           linalg.yield %cst : i32
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex32.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex32.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex32.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex32.mlir
@@ -48,7 +48,7 @@
     %0 = linalg.generic #trait_op
        ins(%arga, %argb: tensor<?xcomplex<f32>, #SparseVector>,
                          tensor<?xcomplex<f32>, #SparseVector>)
-        outs(%xv: tensor<?xcomplex<f32>, #SparseVector>) {
+        inits(%xv: tensor<?xcomplex<f32>, #SparseVector>) {
         ^bb(%a: complex<f32>, %b: complex<f32>, %x: complex<f32>):
           %1 = complex.add %a, %b : complex<f32>
           linalg.yield %1 : complex<f32>
@@ -65,7 +65,7 @@
     %0 = linalg.generic #trait_op
        ins(%arga, %argb: tensor<?xcomplex<f32>, #SparseVector>,
                          tensor<?xcomplex<f32>, #SparseVector>)
-        outs(%xv: tensor<?xcomplex<f32>, #SparseVector>) {
+        inits(%xv: tensor<?xcomplex<f32>, #SparseVector>) {
         ^bb(%a: complex<f32>, %b: complex<f32>, %x: complex<f32>):
           %1 = complex.mul %a, %b : complex<f32>
           linalg.yield %1 : complex<f32>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex64.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex64.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex64.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex64.mlir
@@ -48,7 +48,7 @@
     %0 = linalg.generic #trait_op
        ins(%arga, %argb: tensor<?xcomplex<f64>, #SparseVector>,
                          tensor<?xcomplex<f64>, #SparseVector>)
-        outs(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
+        inits(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
         ^bb(%a: complex<f64>, %b: complex<f64>, %x: complex<f64>):
           %1 = complex.add %a, %b : complex<f64>
           linalg.yield %1 : complex<f64>
@@ -65,7 +65,7 @@
     %0 = linalg.generic #trait_op
        ins(%arga, %argb: tensor<?xcomplex<f64>, #SparseVector>,
                          tensor<?xcomplex<f64>, #SparseVector>)
-        outs(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
+        inits(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
         ^bb(%a: complex<f64>, %b: complex<f64>, %x: complex<f64>):
           %1 = complex.mul %a, %b : complex<f64>
           linalg.yield %1 : complex<f64>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex_ops.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex_ops.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex_ops.mlir
@@ -57,7 +57,7 @@
     %0 = linalg.generic #trait_op2
        ins(%arga, %argb: tensor<?xcomplex<f64>, #SparseVector>,
                          tensor<?xcomplex<f64>, #SparseVector>)
-        outs(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
+        inits(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
         ^bb(%a: complex<f64>, %b: complex<f64>, %x: complex<f64>):
           %1 = complex.neg %b : complex<f64>
           %2 = complex.sub %a, %1 : complex<f64>
@@ -73,7 +73,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xcomplex<f64>, #SparseVector>
     %0 = linalg.generic #trait_op1
        ins(%arga: tensor<?xcomplex<f64>, #SparseVector>)
-        outs(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
+        inits(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
         ^bb(%a: complex<f64>, %x: complex<f64>):
           %1 = complex.sin %a : complex<f64>
           linalg.yield %1 : complex<f64>
@@ -88,7 +88,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xcomplex<f64>, #SparseVector>
     %0 = linalg.generic #trait_op1
        ins(%arga: tensor<?xcomplex<f64>, #SparseVector>)
-        outs(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
+        inits(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
         ^bb(%a: complex<f64>, %x: complex<f64>):
           %1 = complex.sqrt %a : complex<f64>
           linalg.yield %1 : complex<f64>
@@ -103,7 +103,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xcomplex<f64>, #SparseVector>
     %0 = linalg.generic #trait_op1
        ins(%arga: tensor<?xcomplex<f64>, #SparseVector>)
-        outs(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
+        inits(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
        ^bb(%a: complex<f64>, %x: complex<f64>):
           %1 = complex.tanh %a : complex<f64>
           linalg.yield %1 : complex<f64>
@@ -118,7 +118,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xcomplex<f64>, #SparseVector>
     %0 = linalg.generic #trait_op1
        ins(%arga: tensor<?xcomplex<f64>, #SparseVector>)
-        outs(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
+        inits(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
         ^bb(%a: complex<f64>, %x: complex<f64>):
           %1 = complex.log1p %a : complex<f64>
           %2 = complex.expm1 %1 : complex<f64>
@@ -135,7 +135,7 @@
     %c = complex.constant [2.0 : f64, 0.0 : f64] : complex<f64>
     %0 = linalg.generic #trait_op1
        ins(%arga: tensor<?xcomplex<f64>, #SparseVector>)
-        outs(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
+        inits(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
         ^bb(%a: complex<f64>, %x: complex<f64>):
           %1 = complex.div %a, %c  : complex<f64>
           linalg.yield %1 : complex<f64>
@@ -150,7 +150,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xf64, #SparseVector>
     %0 = linalg.generic #trait_op1
        ins(%arga: tensor<?xcomplex<f64>, #SparseVector>)
-        outs(%xv: tensor<?xf64, #SparseVector>) {
+        inits(%xv: tensor<?xf64, #SparseVector>) {
         ^bb(%a: complex<f64>, %x: f64):
           %1 = complex.abs %a : complex<f64>
           linalg.yield %1 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir
@@ -43,7 +43,7 @@
 // Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f
 func.func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> tensor<?x?x?xf32> {
   %buf = bufferization.alloc_tensor(%s1, %s2, %s3) : tensor<?x?x?xf32>
-  %ret = linalg.fill ins(%f : f32) outs(%buf : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+  %ret = linalg.fill ins(%f : f32) inits(%buf : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
   return %ret : tensor<?x?x?xf32>
 }
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir
@@ -42,7 +42,7 @@
 // Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f
 func.func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> tensor<?x?x?x?xf32> {
   %buf = bufferization.alloc_tensor(%s1, %s2, %s3, %s4) : tensor<?x?x?x?xf32>
-  %ret = linalg.fill ins(%f : f32) outs(%buf : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
+  %ret = linalg.fill ins(%f : f32) inits(%buf : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
   return %ret : tensor<?x?x?x?xf32>
 }
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir
@@ -42,7 +42,7 @@
 // Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f
 func.func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> tensor<?x?x?xf32> {
   %buf = bufferization.alloc_tensor(%s1, %s2, %s3) : tensor<?x?x?xf32>
-  %ret = linalg.fill ins(%f : f32) outs(%buf : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+  %ret = linalg.fill ins(%f : f32) inits(%buf : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
   return %ret : tensor<?x?x?xf32>
 }
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir
@@ -42,7 +42,7 @@
 // Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f
 func.func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> tensor<?x?x?x?x?xf32> {
   %buf = bufferization.alloc_tensor(%s1, %s2, %s3, %s4, %s5) : tensor<?x?x?x?x?xf32>
-  %ret = linalg.fill ins(%f : f32) outs(%buf : tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32>
+  %ret = linalg.fill ins(%f : f32) inits(%buf : tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32>
   return %ret : tensor<?x?x?x?x?xf32>
 }
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_coo_test.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_coo_test.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_coo_test.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_coo_test.mlir
@@ -52,11 +52,11 @@
     %zero = arith.constant 0.000000e+00 : f32
     %init = linalg.fill
         ins(%zero : f32)
-        outs(%empty : tensor<8x8xf32>) -> tensor<8x8xf32>
+        inits(%empty : tensor<8x8xf32>) -> tensor<8x8xf32>
     %0 = linalg.generic #trait
       ins(%arga, %argb: tensor<8x8xf32, #CSR>,
                         tensor<8x8xf32, #SortedCOO>)
-      outs(%init: tensor<8x8xf32>) {
+      inits(%init: tensor<8x8xf32>) {
         ^bb(%a: f32, %b: f32, %x: f32):
           %0 = arith.addf %a, %b : f32
           linalg.yield %0 : f32
@@ -71,11 +71,11 @@
     %zero = arith.constant 0.000000e+00 : f32
     %init = linalg.fill
         ins(%zero : f32)
-        outs(%empty : tensor<8x8xf32>) -> tensor<8x8xf32>
+        inits(%empty : tensor<8x8xf32>) -> tensor<8x8xf32>
     %0 = linalg.generic #trait
       ins(%arga, %argb: tensor<8x8xf32, #SortedCOO>,
                         tensor<8x8xf32, #SortedCOO>)
-      outs(%init: tensor<8x8xf32>) {
+      inits(%init: tensor<8x8xf32>) {
         ^bb(%a: f32, %b: f32, %x: f32):
           %0 = arith.addf %a, %b : f32
           linalg.yield %0 : f32
@@ -90,11 +90,11 @@
     %zero = arith.constant 0.000000e+00 : f32
     %init = linalg.fill
         ins(%zero : f32)
-        outs(%empty : tensor<8x8xf32>) -> tensor<8x8xf32>
+        inits(%empty : tensor<8x8xf32>) -> tensor<8x8xf32>
     %0 = linalg.generic #trait
       ins(%arga, %argb: tensor<8x8xf32>,
                         tensor<8x8xf32, #SortedCOO>)
-      outs(%init: tensor<8x8xf32>) {
+      inits(%init: tensor<8x8xf32>) {
         ^bb(%a: f32, %b: f32, %x: f32):
           %0 = arith.addf %a, %b : f32
           linalg.yield %0 : f32
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dot.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dot.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dot.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dot.mlir
@@ -38,7 +38,7 @@
                         %x: tensor<f32>) -> tensor<f32> {
     %dot = linalg.dot ins(%a, %b: tensor<1024xf32, #SparseVector>,
                                   tensor<1024xf32, #SparseVector>)
-         outs(%x: tensor<f32>) -> tensor<f32>
+         inits(%x: tensor<f32>) -> tensor<f32>
     return %dot : tensor<f32>
   }
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir
@@ -43,7 +43,7 @@
     %C = bufferization.alloc_tensor() : tensor<8x4xf64, #CSC>
     %D = linalg.matmul
       ins(%A, %B: tensor<8x2xf64, #CSC>, tensor<2x4xf64, #CSC>)
-         outs(%C: tensor<8x4xf64, #CSC>) -> tensor<8x4xf64, #CSC>
+         inits(%C: tensor<8x4xf64, #CSC>) -> tensor<8x4xf64, #CSC>
     return %D: tensor<8x4xf64, #CSC>
   }
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir
@@ -63,7 +63,7 @@
                                 -> tensor<7x3xf64> {
     %0 = linalg.generic #trait_flatten
       ins(%arga: tensor<7x3x3x3x3x3x5x3xf64, #SparseTensor>)
-      outs(%argx: tensor<7x3xf64>) {
+      inits(%argx: tensor<7x3xf64>) {
       ^bb(%a: f64, %x: f64):
         %0 = arith.addf %x, %a : f64
         linalg.yield %0 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index.mlir
@@ -66,7 +66,7 @@
     %init = bufferization.alloc_tensor() : tensor<8xi64, #SparseVector>
     %r = linalg.generic #trait_1d
         ins(%arga: tensor<8xi64, #SparseVector>)
-       outs(%init: tensor<8xi64, #SparseVector>) {
+       inits(%init: tensor<8xi64, #SparseVector>) {
         ^bb(%a: i64, %x: i64):
           %i = linalg.index 0 : index
           %ii = arith.index_cast %i : index to i64
@@ -84,7 +84,7 @@
     %init = bufferization.alloc_tensor() : tensor<8xi64, #SparseVector>
     %r = linalg.generic #trait_1d
         ins(%arga: tensor<8xi64, #SparseVector>)
-       outs(%init: tensor<8xi64, #SparseVector>) {
+       inits(%init: tensor<8xi64, #SparseVector>) {
         ^bb(%a: i64, %x: i64):
           %i = linalg.index 0 : index
           %ii = arith.index_cast %i : index to i64
@@ -102,7 +102,7 @@
     %init = bufferization.alloc_tensor() : tensor<3x4xi64, #SparseMatrix>
     %r = linalg.generic #trait_2d
         ins(%arga: tensor<3x4xi64, #SparseMatrix>)
-       outs(%init: tensor<3x4xi64, #SparseMatrix>) {
+       inits(%init: tensor<3x4xi64, #SparseMatrix>) {
         ^bb(%a: i64, %x: i64):
           %i = linalg.index 0 : index
           %j = linalg.index 1 : index
@@ -123,7 +123,7 @@
     %init = bufferization.alloc_tensor() : tensor<3x4xi64, #SparseMatrix>
     %r = linalg.generic #trait_2d
         ins(%arga: tensor<3x4xi64, #SparseMatrix>)
-       outs(%init: tensor<3x4xi64, #SparseMatrix>) {
+       inits(%init: tensor<3x4xi64, #SparseMatrix>) {
         ^bb(%a: i64, %x: i64):
           %i = linalg.index 0 : index
           %j = linalg.index 1 : index
@@ -141,7 +141,7 @@
     %0 = bufferization.alloc_tensor() : tensor<2x3xf32, #SparseMatrix>
     %1 = linalg.generic #trait_2d
       ins(%arg0 : tensor<2x3xf32, #SparseMatrix>)
-      outs(%0 : tensor<2x3xf32, #SparseMatrix>) {
+      inits(%0 : tensor<2x3xf32, #SparseMatrix>) {
     ^bb0(%arg1: f32, %arg2: f32):
       %2 = linalg.index 0 : index
       %3 = arith.index_cast %2 : index to i64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir
@@ -64,7 +64,7 @@
                                   %out: tensor<8xi64>) -> tensor<8xi64> {
     %r = linalg.generic #trait_1d
         ins(%arga: tensor<8xi64, #SparseVector>)
-       outs(%out: tensor<8xi64>) {
+       inits(%out: tensor<8xi64>) {
         ^bb(%a: i64, %x: i64):
           %i = linalg.index 0 : index
           %ii = arith.index_cast %i : index to i64
@@ -81,7 +81,7 @@
                                   %out: tensor<8xi64>) -> tensor<8xi64> {
     %r = linalg.generic #trait_1d
         ins(%arga: tensor<8xi64, #SparseVector>)
-       outs(%out: tensor<8xi64>) {
+       inits(%out: tensor<8xi64>) {
         ^bb(%a: i64, %x: i64):
           %i = linalg.index 0 : index
           %ii = arith.index_cast %i : index to i64
@@ -98,7 +98,7 @@
                                   %out: tensor<3x4xi64>) -> tensor<3x4xi64> {
     %r = linalg.generic #trait_2d
         ins(%arga: tensor<3x4xi64, #SparseMatrix>)
-       outs(%out: tensor<3x4xi64>) {
+       inits(%out: tensor<3x4xi64>) {
         ^bb(%a: i64, %x: i64):
           %i = linalg.index 0 : index
           %j = linalg.index 1 : index
@@ -118,7 +118,7 @@
                                   %out: tensor<3x4xi64>) -> tensor<3x4xi64> {
     %r = linalg.generic #trait_2d
         ins(%arga: tensor<3x4xi64, #SparseMatrix>)
-       outs(%out: tensor<3x4xi64>) {
+       inits(%out: tensor<3x4xi64>) {
         ^bb(%a: i64, %x: i64):
           %i = linalg.index 0 : index
           %j = linalg.index 1 : index
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir
@@ -57,7 +57,7 @@
                      %C: tensor<4x4xf64>) -> tensor<4x4xf64> {
     %D = linalg.matmul
       ins(%A, %B: tensor<4x8xf64>, tensor<8x4xf64>)
-      outs(%C: tensor<4x4xf64>) -> tensor<4x4xf64>
+      inits(%C: tensor<4x4xf64>) -> tensor<4x4xf64>
     return %D: tensor<4x4xf64>
   }
 
@@ -69,7 +69,7 @@
     %C = bufferization.alloc_tensor() : tensor<4x4xf64, #CSR>
     %D = linalg.matmul
       ins(%A, %B: tensor<4x8xf64, #CSR>, tensor<8x4xf64, #CSR>)
-         outs(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR>
+         inits(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR>
     return %D: tensor<4x4xf64, #CSR>
   }
 
@@ -81,7 +81,7 @@
     %C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR>
     %D = linalg.matmul
       ins(%A, %B: tensor<4x8xf64, #DCSR>, tensor<8x4xf64, #DCSR>)
-         outs(%C: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR>
+         inits(%C: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR>
     return %D: tensor<4x4xf64, #DCSR>
   }
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul_slice.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul_slice.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul_slice.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul_slice.mlir
@@ -61,7 +61,7 @@
     %C = bufferization.alloc_tensor() : tensor<4x4xf64, #CSR>
     %D = linalg.matmul
       ins(%A, %B: tensor<4x4xf64, #CSR_SLICE_dyn>, tensor<4x4xf64, #DCSR_SLICE_dyn>)
-         outs(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR>
+         inits(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR>
     return %D: tensor<4x4xf64, #CSR>
   }
 
@@ -73,7 +73,7 @@
     %C = bufferization.alloc_tensor() : tensor<4x4xf64, #CSR>
     %D = linalg.matmul
       ins(%A, %B: tensor<4x4xf64, #CSR_SLICE_1>, tensor<4x4xf64, #DCSR_SLICE_1>)
-         outs(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR>
+         inits(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR>
     return %D: tensor<4x4xf64, #CSR>
   }
 
@@ -85,7 +85,7 @@
     %C = bufferization.alloc_tensor() : tensor<4x4xf64, #CSR>
     %D = linalg.matmul
       ins(%A, %B: tensor<4x8xf64, #CSR_SLICE>, tensor<8x4xf64, #CSR>)
-         outs(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR>
+         inits(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR>
     return %D: tensor<4x4xf64, #CSR>
   }
 
@@ -97,7 +97,7 @@
     %C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR>
     %D = linalg.matmul
       ins(%A, %B: tensor<4x8xf64, #DCSR_SLICE>, tensor<8x4xf64, #DCSR>)
-         outs(%C: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR>
+         inits(%C: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR>
     return %D: tensor<4x4xf64, #DCSR>
   }
 
@@ -215,7 +215,7 @@
 
     %d = bufferization.alloc_tensor() copy(%zero) : tensor<4x4xf64>
     %r = linalg.matmul ins(%ds2, %ds1: tensor<4x4xf64>, tensor<4x4xf64>)
-                       outs(%d: tensor<4x4xf64>) -> tensor<4x4xf64>
+                       inits(%d: tensor<4x4xf64>) -> tensor<4x4xf64>
     %du = tensor.cast %r : tensor<4x4xf64> to tensor<*xf64>
     call @printMemrefF64(%du) : (tensor<*xf64>) -> ()
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir
@@ -69,7 +69,7 @@
     %xm = bufferization.alloc_tensor(%d0, %d1) : tensor<?x?xf64, #DCSR>
     %0 = linalg.generic #trait_scale
        ins(%arga: tensor<?x?xf64, #DCSR>)
-        outs(%xm: tensor<?x?xf64, #DCSR>) {
+        inits(%xm: tensor<?x?xf64, #DCSR>) {
         ^bb(%a: f64, %x: f64):
           %1 = arith.mulf %a, %s : f64
           linalg.yield %1 : f64
@@ -81,7 +81,7 @@
   func.func @matrix_scale_inplace(%argx: tensor<?x?xf64, #DCSR>) -> tensor<?x?xf64, #DCSR> {
     %s = arith.constant 2.0 : f64
     %0 = linalg.generic #trait_scale_inpl
-      outs(%argx: tensor<?x?xf64, #DCSR>) {
+      inits(%argx: tensor<?x?xf64, #DCSR>) {
         ^bb(%x: f64):
           %1 = arith.mulf %x, %s : f64
           linalg.yield %1 : f64
@@ -99,7 +99,7 @@
     %xv = bufferization.alloc_tensor(%d0, %d1) : tensor<?x?xf64, #DCSR>
     %0 = linalg.generic #trait_op
        ins(%arga, %argb: tensor<?x?xf64, #DCSR>, tensor<?x?xf64, #DCSR>)
-        outs(%xv: tensor<?x?xf64, #DCSR>) {
+        inits(%xv: tensor<?x?xf64, #DCSR>) {
         ^bb(%a: f64, %b: f64, %x: f64):
           %1 = arith.addf %a, %b : f64
           linalg.yield %1 : f64
@@ -117,7 +117,7 @@
     %xv = bufferization.alloc_tensor(%d0, %d1) : tensor<?x?xf64, #DCSR>
     %0 = linalg.generic #trait_op
        ins(%arga, %argb: tensor<?x?xf64, #DCSR>, tensor<?x?xf64, #DCSR>)
-        outs(%xv: tensor<?x?xf64, #DCSR>) {
+        inits(%xv: tensor<?x?xf64, #DCSR>) {
         ^bb(%a: f64, %b: f64, %x: f64):
           %1 = arith.mulf %a, %b : f64
           linalg.yield %1 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir
@@ -69,7 +69,7 @@
                                -> tensor<?xi32> {
     %0 = linalg.generic #matvec
       ins(%arga, %argb: tensor<?x?xi32, #SparseMatrix>, tensor<?xi32>)
-      outs(%argx: tensor<?xi32>) {
+      inits(%argx: tensor<?xi32>) {
       ^bb(%a: i32, %b: i32, %x: i32):
         %0 = arith.muli %a, %b : i32
         %1 = arith.addi %x, %0 : i32
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir
@@ -65,7 +65,7 @@
     %0 = linalg.generic #mttkrp
       ins(%argb, %argc, %argd:
             tensor<?x?x?xf64, #SparseTensor>, tensor<?x?xf64>, tensor<?x?xf64>)
-      outs(%arga: tensor<?x?xf64>) {
+      inits(%arga: tensor<?x?xf64>) {
       ^bb(%b: f64, %c: f64, %d: f64, %a: f64):
         %0 = arith.mulf %b, %c : f64
         %1 = arith.mulf %d, %0 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_mult_elt.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_mult_elt.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_mult_elt.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_mult_elt.mlir
@@ -47,7 +47,7 @@
     %argx = bufferization.alloc_tensor() : tensor<32x16xf32, #DCSR>
     %0 = linalg.generic #trait_mult_elt
       ins(%arga, %argb: tensor<32x16xf32, #DCSR>, tensor<32x16xf32, #DCSR>)
-      outs(%argx: tensor<32x16xf32, #DCSR>) {
+      inits(%argx: tensor<32x16xf32, #DCSR>) {
         ^bb(%a: f32, %b: f32, %x: f32):
           %1 = arith.mulf %a, %b : f32
           linalg.yield %1 : f32
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir
@@ -56,7 +56,7 @@
     %0 = linalg.generic #redsum
       ins(%arga, %argb: tensor<?x?x?xi32, #SparseTensor>,
                         tensor<?x?x?xi32, #SparseTensor>)
-      outs(%xinit: tensor<?x?xi32, #SparseMatrix>) {
+      inits(%xinit: tensor<?x?xi32, #SparseMatrix>) {
         ^bb(%a: i32, %b: i32, %x: i32):
           %0 = arith.muli %a, %b : i32
           %1 = arith.addi %x, %0 : i32
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir
@@ -58,7 +58,7 @@
   func.func @kernel_eltwise_mult(%argx: tensor<?x?xf64, #DCSR>)
     -> tensor<?x?xf64, #DCSR> {
     %0 = linalg.generic #eltwise_mult
-      outs(%argx: tensor<?x?xf64, #DCSR>) {
+      inits(%argx: tensor<?x?xf64, #DCSR>) {
       ^bb(%x: f64):
         %0 = arith.mulf %x, %x : f64
         linalg.yield %0 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir
@@ -41,7 +41,7 @@
     %c2 = arith.constant 2 : i32
     %0 = linalg.quantized_matmul
       ins(%input1, %input2, %c2, %c0 : tensor<5x3xi8>, tensor<3x6xi8, #DCSR>, i32, i32)
-      outs(%output : tensor<5x6xi32>) -> tensor<5x6xi32>
+      inits(%output : tensor<5x6xi32>) -> tensor<5x6xi32>
     return %0: tensor<5x6xi32>
   }
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_re_im.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_re_im.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_re_im.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_re_im.mlir
@@ -45,7 +45,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xf32, #SparseVector>
     %0 = linalg.generic #trait_op
        ins(%arga: tensor<?xcomplex<f32>, #SparseVector>)
-        outs(%xv: tensor<?xf32, #SparseVector>) {
+        inits(%xv: tensor<?xf32, #SparseVector>) {
         ^bb(%a: complex<f32>, %x: f32):
           %1 = complex.re %a : complex<f32>
           linalg.yield %1 : f32
@@ -60,7 +60,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xf32, #SparseVector>
     %0 = linalg.generic #trait_op
        ins(%arga: tensor<?xcomplex<f32>, #SparseVector>)
-        outs(%xv: tensor<?xf32, #SparseVector>) {
+        inits(%xv: tensor<?xf32, #SparseVector>) {
         ^bb(%a: complex<f32>, %x: f32):
           %1 = complex.im %a : complex<f32>
           linalg.yield %1 : f32
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom.mlir
@@ -59,7 +59,7 @@
     %xm = bufferization.alloc_tensor(%d0, %d1) : tensor<?x?xf64, #CSR>
     %0 = linalg.generic #trait_matmul
        ins(%arga, %argb: tensor<?x?xf64, #CSR>, tensor<?x?xf64, #CSR>)
-        outs(%xm: tensor<?x?xf64, #CSR>) {
+        inits(%xm: tensor<?x?xf64, #CSR>) {
         ^bb(%a: f64, %b: f64, %output: f64):
           %1 = sparse_tensor.binary %a, %b : f64, f64 to f64
             overlap = {
@@ -90,7 +90,7 @@
     %xm = bufferization.alloc_tensor(%d0, %d1) : tensor<?x?xf64, #CSR>
     %0 = linalg.generic #trait_matmul
        ins(%arga, %argb: tensor<?x?xf64, #CSR>, tensor<?x?xf64, #CSC>)
-        outs(%xm: tensor<?x?xf64, #CSR>) {
+        inits(%xm: tensor<?x?xf64, #CSR>) {
         ^bb(%a: f64, %b: f64, %output: f64):
           %1 = sparse_tensor.binary %a, %b : f64, f64 to f64
             overlap = {
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_prod.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_prod.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_prod.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_prod.mlir
@@ -47,7 +47,7 @@
     %xv = bufferization.alloc_tensor(%d0): tensor<?xf64, #SparseVector>
     %0 = linalg.generic #trait_mat_reduce_rowwise
       ins(%arga: tensor<?x?xf64, #CSR>)
-      outs(%xv: tensor<?xf64, #SparseVector>) {
+      inits(%xv: tensor<?xf64, #SparseVector>) {
         ^bb(%a: f64, %b: f64):
           %1 = sparse_tensor.reduce %a, %b, %cf1 : f64 {
               ^bb0(%x: f64, %y: f64):
@@ -66,7 +66,7 @@
     %xv = bufferization.alloc_tensor(%d0): tensor<?xf64, #SparseVector>
     %0 = linalg.generic #trait_mat_reduce_rowwise
       ins(%arga: tensor<?x?xf64, #CSC>)
-      outs(%xv: tensor<?xf64, #SparseVector>) {
+      inits(%xv: tensor<?xf64, #SparseVector>) {
         ^bb(%a: f64, %b: f64):
           %1 = sparse_tensor.reduce %a, %b, %cf1 : f64 {
               ^bb0(%x: f64, %y: f64):
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir
@@ -46,7 +46,7 @@
                           %argx: tensor<i32>) -> tensor<i32> {
     %0 = linalg.generic #trait_reduction
       ins(%arga: tensor<32xi32, #SV>)
-      outs(%argx: tensor<i32>) {
+      inits(%argx: tensor<i32>) {
         ^bb(%a: i32, %x: i32):
           %0 = arith.addi %x, %a : i32
           linalg.yield %0 : i32
@@ -58,7 +58,7 @@
                           %argx: tensor<f32>) -> tensor<f32> {
     %0 = linalg.generic #trait_reduction
       ins(%arga: tensor<32xf32, #SV>)
-      outs(%argx: tensor<f32>) {
+      inits(%argx: tensor<f32>) {
         ^bb(%a: f32, %x: f32):
           %0 = arith.addf %x, %a : f32
           linalg.yield %0 : f32
@@ -70,7 +70,7 @@
                           %argx: tensor<i32>) -> tensor<i32> {
     %0 = linalg.generic #trait_reduction
       ins(%arga: tensor<32xi32, #DV>)
-      outs(%argx: tensor<i32>) {
+      inits(%argx: tensor<i32>) {
         ^bb(%a: i32, %x: i32):
           %0 = arith.andi %x, %a : i32
           linalg.yield %0 : i32
@@ -82,7 +82,7 @@
                          %argx: tensor<i32>) -> tensor<i32> {
     %0 = linalg.generic #trait_reduction
       ins(%arga: tensor<32xi32, #SV>)
-      outs(%argx: tensor<i32>) {
+      inits(%argx: tensor<i32>) {
         ^bb(%a: i32, %x: i32):
           %0 = arith.ori %x, %a : i32
           linalg.yield %0 : i32
@@ -94,7 +94,7 @@
                           %argx: tensor<i32>) -> tensor<i32> {
     %0 = linalg.generic #trait_reduction
       ins(%arga: tensor<32xi32, #SV>)
-      outs(%argx: tensor<i32>) {
+      inits(%argx: tensor<i32>) {
         ^bb(%a: i32, %x: i32):
           %0 = arith.xori %x, %a : i32
           linalg.yield %0 : i32
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir
@@ -38,7 +38,7 @@
                            %argx: tensor<i32>) -> tensor<i32> {
     %0 = linalg.generic #trait_reduction
       ins(%arga: tensor<32xi32, #DV>)
-      outs(%argx: tensor<i32>) {
+      inits(%argx: tensor<i32>) {
         ^bb(%a: i32, %x: i32):
           %0 = arith.muli %x, %a : i32
           linalg.yield %0 : i32
@@ -50,7 +50,7 @@
                            %argx: tensor<f32>) -> tensor<f32> {
     %0 = linalg.generic #trait_reduction
       ins(%arga: tensor<32xf32, #DV>)
-      outs(%argx: tensor<f32>) {
+      inits(%argx: tensor<f32>) {
         ^bb(%a: f32, %x: f32):
           %0 = arith.mulf %x, %a : f32
           linalg.yield %0 : f32
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir
@@ -62,7 +62,7 @@
                                  %argx: tensor<?x?xf32>) -> tensor<?x?xf32> {
     %0 = linalg.generic #trait_sampled_dense_dense
       ins(%args, %arga, %argb: tensor<?x?xf32, #SparseMatrix>, tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%argx: tensor<?x?xf32>) {
+      inits(%argx: tensor<?x?xf32>) {
         ^bb(%s: f32, %a: f32, %b: f32, %x: f32):
           %0 = arith.mulf %a, %b : f32
           %1 = arith.mulf %s, %0 : f32
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir
@@ -72,7 +72,7 @@
     %2 = linalg.generic #trait_sampled_dense_dense
       ins(%args, %arga, %argb: tensor<8x8xf64, #SM>,
                                tensor<8x8xf64>, tensor<8x8xf64>)
-      outs(%1: tensor<8x8xf64>) {
+      inits(%1: tensor<8x8xf64>) {
         ^bb(%s: f64, %a: f64, %b: f64, %x: f64):
           %p = arith.mulf %a, %b : f64
           %q = arith.mulf %s, %p : f64
@@ -93,7 +93,7 @@
     %1 = arith.constant dense<0.0> : tensor<8x8xf64>
     %2 = linalg.generic #trait_matmul
       ins(%arga, %argb : tensor<8x8xf64>, tensor<8x8xf64>)
-      outs(%1 : tensor<8x8xf64>) {
+      inits(%1 : tensor<8x8xf64>) {
         ^bb0(%a: f64, %b: f64, %x: f64):
           %p = arith.mulf %a, %b : f64
           %q = arith.addf %x, %p : f64
@@ -102,7 +102,7 @@
     // Sample the result with elements-wise multiplication with sparse matrix.
     %3 = linalg.generic #trait_scale
       ins(%2, %args : tensor<8x8xf64>, tensor<8x8xf64, #SM>)
-      outs(%1 : tensor<8x8xf64>) {
+      inits(%1 : tensor<8x8xf64>) {
         ^bb0(%t: f64, %s: f64, %x: f64):
           %r = arith.mulf %t, %s : f64
           linalg.yield %r : f64
@@ -121,7 +121,7 @@
     %2 = linalg.generic #trait_sampled_dense_dense
       ins(%args, %arga, %argb: tensor<8x8xf64, #SM>,
                                tensor<8x8xf64>, tensor<8x8xf64>)
-      outs(%1: tensor<8x8xf64, #SM>) {
+      inits(%1: tensor<8x8xf64, #SM>) {
         ^bb(%s: f64, %a: f64, %b: f64, %x: f64):
           %p = arith.mulf %a, %b : f64
           %q = arith.mulf %s, %p : f64
@@ -143,7 +143,7 @@
     %1 = arith.constant dense<0.0> : tensor<8x8xf64>
     %2 = linalg.generic #trait_matmul
       ins(%arga, %argb : tensor<8x8xf64>, tensor<8x8xf64>)
-      outs(%1 : tensor<8x8xf64>) {
+      inits(%1 : tensor<8x8xf64>) {
         ^bb0(%a: f64, %b: f64, %x: f64):
           %p = arith.mulf %a, %b : f64
           %q = arith.addf %x, %p : f64
@@ -153,7 +153,7 @@
     %3 = bufferization.alloc_tensor() : tensor<8x8xf64, #SM>
     %4 = linalg.generic #trait_scale
       ins(%2, %args : tensor<8x8xf64>, tensor<8x8xf64, #SM>)
-      outs(%3 : tensor<8x8xf64, #SM>) {
+      inits(%3 : tensor<8x8xf64, #SM>) {
         ^bb0(%t: f64, %s: f64, %x: f64):
           %r = arith.mulf %t, %s : f64
           linalg.yield %r : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir
@@ -44,7 +44,7 @@
   func.func @sparse_scale(%argx: tensor<8x8xf32, #CSR>) -> tensor<8x8xf32, #CSR> {
     %c = arith.constant 2.0 : f32
     %0 = linalg.generic #trait_scale
-      outs(%argx: tensor<8x8xf32, #CSR>) {
+      inits(%argx: tensor<8x8xf32, #CSR>) {
         ^bb(%x: f32):
           %1 = arith.mulf %x, %c : f32
           linalg.yield %1 : f32
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scf_nested.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scf_nested.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scf_nested.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scf_nested.mlir
@@ -36,7 +36,7 @@
         indexing_maps = [#map, #map, #map],
         iterator_types = ["parallel", "parallel", "parallel"]}
         ins(%arg0, %cst_3 : tensor<2x3x4xf64, #SparseMatrix>, tensor<2x3x4xf64>)
-        outs(%2 : tensor<2x3x4xf64, #SparseMatrix>) {
+        inits(%2 : tensor<2x3x4xf64, #SparseMatrix>) {
           ^bb0(%arg1: f64, %arg2: f64, %arg3: f64):
             %4 = arith.subf %arg1, %arg2 : f64
             linalg.yield %4 : f64
@@ -50,7 +50,7 @@
         indexing_maps = [#map, #map, #map],
         iterator_types = ["parallel", "parallel", "parallel"]}
         ins(%arg0, %cst_3 : tensor<2x3x4xf64, #SparseMatrix>, tensor<2x3x4xf64>)
-        outs(%2 : tensor<2x3x4xf64, #SparseMatrix>) {
+        inits(%2 : tensor<2x3x4xf64, #SparseMatrix>) {
           ^bb0(%arg1: f64, %arg2: f64, %arg3: f64):
             %4 = arith.addf %arg1, %arg2 : f64
             linalg.yield %4 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_select.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_select.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_select.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_select.mlir
@@ -56,7 +56,7 @@
     %xv = bufferization.alloc_tensor(%d0): tensor<?xf64, #SparseVector>
     %0 = linalg.generic #trait_vec_select
       ins(%arga: tensor<?xf64, #SparseVector>)
-      outs(%xv: tensor<?xf64, #SparseVector>) {
+      inits(%xv: tensor<?xf64, #SparseVector>) {
         ^bb(%a: f64, %b: f64):
           %1 = sparse_tensor.select %a : f64 {
               ^bb0(%x: f64):
@@ -76,7 +76,7 @@
     %xv = bufferization.alloc_tensor(%d0, %d1): tensor<?x?xf64, #CSR>
     %0 = linalg.generic #trait_mat_select
       ins(%arga: tensor<?x?xf64, #CSR>)
-      outs(%xv: tensor<?x?xf64, #CSR>) {
+      inits(%xv: tensor<?x?xf64, #CSR>) {
         ^bb(%a: f64, %b: f64):
           %row = linalg.index 0 : index
           %col = linalg.index 1 : index
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sign.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sign.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sign.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sign.mlir
@@ -51,7 +51,7 @@
     %xin = bufferization.alloc_tensor(%d) : tensor<?xf64, #SparseVector>
     %0 = linalg.generic #trait_op
       ins(%arg0: tensor<?xf64, #SparseVector>)
-      outs(%xin: tensor<?xf64, #SparseVector>) {
+      inits(%xin: tensor<?xf64, #SparseVector>) {
       ^bb0(%a: f64, %x: f64) :
         %result = sparse_tensor.unary %a : f64 to f64
           present={
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir
@@ -69,7 +69,7 @@
                               -> tensor<?x?xf64, #SortedCOO> {
     %c = arith.constant 2.0 : f64
     %0 = linalg.generic #trait_scale
-      outs(%argx: tensor<?x?xf64, #SortedCOO>) {
+      inits(%argx: tensor<?x?xf64, #SortedCOO>) {
         ^bb(%x: f64):
           %1 = arith.mulf %x, %c : f64
           linalg.yield %1 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir
@@ -59,7 +59,7 @@
                          %argx: tensor<?x?xf64>) -> tensor<?x?xf64> {
     %0 = linalg.generic #spmm
       ins(%arga, %argb: tensor<?x?xf64, #SparseMatrix>, tensor<?x?xf64>)
-      outs(%argx: tensor<?x?xf64>) {
+      inits(%argx: tensor<?x?xf64>) {
       ^bb(%a: f64, %b: f64, %x: f64):
         %0 = arith.mulf %a, %b : f64
         %1 = arith.addf %x, %0 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir
@@ -59,7 +59,7 @@
                                %argx: tensor<f64>) -> tensor<f64> {
     %0 = linalg.generic #trait_sum_reduce
       ins(%arga: tensor<?x?xf64, #SparseMatrix>)
-      outs(%argx: tensor<f64>) {
+      inits(%argx: tensor<f64>) {
       ^bb(%a: f64, %x: f64):
         %0 = arith.addf %x, %a : f64
         linalg.yield %0 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir
@@ -40,7 +40,7 @@
                                %argx: tensor<bf16>) -> tensor<bf16> {
     %0 = linalg.generic #trait_sum_reduce
       ins(%arga: tensor<?x?xbf16, #SparseMatrix>)
-      outs(%argx: tensor<bf16>) {
+      inits(%argx: tensor<bf16>) {
       ^bb(%a: bf16, %x: bf16):
         %0 = arith.addf %x, %a : bf16
         linalg.yield %0 : bf16
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir
@@ -59,7 +59,7 @@
                                %argx: tensor<complex<f64>>) -> tensor<complex<f64>> {
     %0 = linalg.generic #trait_sum_reduce
       ins(%arga: tensor<?x?xcomplex<f64>, #SparseMatrix>)
-      outs(%argx: tensor<complex<f64>>) {
+      inits(%argx: tensor<complex<f64>>) {
       ^bb(%a: complex<f64>, %x: complex<f64>):
         %0 = complex.add %x, %a : complex<f64>
         linalg.yield %0 : complex<f64>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir
@@ -49,7 +49,7 @@
                                %argx: tensor<f16>) -> tensor<f16> {
     %0 = linalg.generic #trait_sum_reduce
       ins(%arga: tensor<?x?xf16, #SparseMatrix>)
-      outs(%argx: tensor<f16>) {
+      inits(%argx: tensor<f16>) {
       ^bb(%a: f16, %x: f16):
         %0 = arith.addf %x, %a : f16
         linalg.yield %0 : f16
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tanh.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tanh.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tanh.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tanh.mlir
@@ -44,7 +44,7 @@
   func.func @sparse_tanh(%vec: tensor<?xf64, #SparseVector>)
                        -> tensor<?xf64, #SparseVector> {
     %0 = linalg.generic #trait_op
-      outs(%vec: tensor<?xf64, #SparseVector>) {
+      inits(%vec: tensor<?xf64, #SparseVector>) {
         ^bb(%x: f64):
           %1 = math.tanh %x : f64
           linalg.yield %1 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_mul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_mul.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_mul.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_mul.mlir
@@ -54,7 +54,7 @@
     %xt = bufferization.alloc_tensor(%d0, %d1, %d2) : tensor<?x?x?xf64, #ST>
     %0 = linalg.generic #trait_mul
        ins(%arga, %argb: tensor<?x?x?xf64, #ST>, tensor<?x?x?xf64, #ST>)
-        outs(%xt: tensor<?x?x?xf64, #ST>) {
+        inits(%xt: tensor<?x?x?xf64, #ST>) {
         ^bb(%a: f64, %b: f64, %x: f64):
           %1 = arith.mulf %a, %b : f64
           linalg.yield %1 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir
@@ -54,7 +54,7 @@
     %xm = bufferization.alloc_tensor(%d0, %d1, %d2) : tensor<?x?x?xf64, #ST2>
     %0 = linalg.generic #trait_scale
        ins(%arga: tensor<?x?x?xf64, #ST1>)
-        outs(%xm: tensor<?x?x?xf64, #ST2>) {
+        inits(%xm: tensor<?x?x?xf64, #ST2>) {
         ^bb(%a: f64, %x: f64):
           %1 = arith.mulf %a, %s : f64
           linalg.yield %1 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose.mlir
@@ -60,7 +60,7 @@
     %i = bufferization.alloc_tensor() : tensor<4x3xf64, #DCSR>
     %0 = linalg.generic #transpose_trait
        ins(%t: tensor<3x4xf64, #DCSC>)
-       outs(%i: tensor<4x3xf64, #DCSR>) {
+       inits(%i: tensor<4x3xf64, #DCSR>) {
        ^bb(%a: f64, %x: f64):
          linalg.yield %a : f64
     } -> tensor<4x3xf64, #DCSR>
@@ -79,7 +79,7 @@
     %i = bufferization.alloc_tensor() : tensor<4x3xf64, #DCSR>
     %0 = linalg.generic #transpose_trait
        ins(%arga: tensor<3x4xf64, #DCSR>)
-       outs(%i: tensor<4x3xf64, #DCSR>) {
+       inits(%i: tensor<4x3xf64, #DCSR>) {
        ^bb(%a: f64, %x: f64):
          linalg.yield %a : f64
     } -> tensor<4x3xf64, #DCSR>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose_coo.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose_coo.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose_coo.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose_coo.mlir
@@ -42,7 +42,7 @@
                        affine_map<(d0, d1) -> (d0, d1)>],
       iterator_types = ["parallel", "parallel"]}
       ins(%arga : tensor<10x5xf32, #SortedCOO>)
-      outs(%0 : tensor<5x10xf32, #SortedCOO>) {
+      inits(%0 : tensor<5x10xf32, #SortedCOO>) {
         ^bb0(%in: f32, %out: f32):
            linalg.yield %in : f32
       } -> tensor<5x10xf32, #SortedCOO>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir
@@ -57,7 +57,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xi32, #SparseVector>
     %0 = linalg.generic #trait_vec_scale
        ins(%arga: tensor<?xf64, #SparseVector>)
-        outs(%xv: tensor<?xi32, #SparseVector>) {
+        inits(%xv: tensor<?xi32, #SparseVector>) {
         ^bb(%a: f64, %x: i32):
           %1 = sparse_tensor.unary %a : f64 to i32
             present={}
@@ -77,7 +77,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xf64, #SparseVector>
     %0 = linalg.generic #trait_vec_scale
        ins(%arga: tensor<?xf64, #SparseVector>)
-        outs(%xv: tensor<?xf64, #SparseVector>) {
+        inits(%xv: tensor<?xf64, #SparseVector>) {
         ^bb(%a: f64, %x: f64):
           %1 = sparse_tensor.unary %a : f64 to f64
             present={
@@ -100,7 +100,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xf64, #SparseVector>
     %0 = linalg.generic #trait_vec_scale
        ins(%arga: tensor<?xf64, #SparseVector>)
-        outs(%xv: tensor<?xf64, #SparseVector>) {
+        inits(%xv: tensor<?xf64, #SparseVector>) {
         ^bb(%a: f64, %x: f64):
           %idx = linalg.index 0 : index
           %1 = sparse_tensor.unary %a : f64 to f64
@@ -128,7 +128,7 @@
     %xv = bufferization.alloc_tensor(%d0, %d1) : tensor<?x?xf64, #DCSR>
     %0 = linalg.generic #trait_mat_scale
        ins(%argx: tensor<?x?xf64, #DCSR>)
-        outs(%xv: tensor<?x?xf64, #DCSR>) {
+        inits(%xv: tensor<?x?xf64, #DCSR>) {
         ^bb(%a: f64, %x: f64):
           %1 = sparse_tensor.unary %a: f64 to f64
             present={
@@ -155,7 +155,7 @@
     %xv = bufferization.alloc_tensor(%d0, %d1) : tensor<?x?xf64, #DCSR>
     %0 = linalg.generic #trait_mat_scale
        ins(%argx: tensor<?x?xf64, #DCSR>)
-        outs(%xv: tensor<?x?xf64, #DCSR>) {
+        inits(%xv: tensor<?x?xf64, #DCSR>) {
         ^bb(%a: f64, %x: f64):
           %row = linalg.index 0 : index
           %col = linalg.index 1 : index
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir
@@ -75,7 +75,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xf64, #SparseVector>
     %0 = linalg.generic #trait_scale
        ins(%arga: tensor<?xf64, #SparseVector>)
-        outs(%xv: tensor<?xf64, #SparseVector>) {
+        inits(%xv: tensor<?xf64, #SparseVector>) {
         ^bb(%a: f64, %x: f64):
           %1 = arith.mulf %a, %s : f64
           linalg.yield %1 : f64
@@ -87,7 +87,7 @@
   func.func @vector_scale_inplace(%argx: tensor<?xf64, #SparseVector>) -> tensor<?xf64, #SparseVector> {
     %s = arith.constant 2.0 : f64
     %0 = linalg.generic #trait_scale_inpl
-      outs(%argx: tensor<?xf64, #SparseVector>) {
+      inits(%argx: tensor<?xf64, #SparseVector>) {
         ^bb(%x: f64):
           %1 = arith.mulf %x, %s : f64
           linalg.yield %1 : f64
@@ -103,7 +103,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xf64, #SparseVector>
     %0 = linalg.generic #trait_op
        ins(%arga, %argb: tensor<?xf64, #SparseVector>, tensor<?xf64, #SparseVector>)
-        outs(%xv: tensor<?xf64, #SparseVector>) {
+        inits(%xv: tensor<?xf64, #SparseVector>) {
         ^bb(%a: f64, %b: f64, %x: f64):
           %1 = arith.addf %a, %b : f64
           linalg.yield %1 : f64
@@ -119,7 +119,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xf64, #SparseVector>
     %0 = linalg.generic #trait_op
        ins(%arga, %argb: tensor<?xf64, #SparseVector>, tensor<?xf64, #SparseVector>)
-        outs(%xv: tensor<?xf64, #SparseVector>) {
+        inits(%xv: tensor<?xf64, #SparseVector>) {
         ^bb(%a: f64, %b: f64, %x: f64):
           %1 = arith.mulf %a, %b : f64
           linalg.yield %1 : f64
@@ -135,7 +135,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xf64, #DenseVector>
     %0 = linalg.generic #trait_op
        ins(%arga, %argb: tensor<?xf64, #SparseVector>, tensor<?xf64, #SparseVector>)
-        outs(%xv: tensor<?xf64, #DenseVector>) {
+        inits(%xv: tensor<?xf64, #DenseVector>) {
         ^bb(%a: f64, %b: f64, %x: f64):
           %1 = arith.mulf %a, %b : f64
           linalg.yield %1 : f64
@@ -149,7 +149,7 @@
                        %argx: tensor<f64>) -> tensor<f64> {
     %0 = linalg.generic #trait_dot
        ins(%arga, %argb: tensor<?xf64, #SparseVector>, tensor<?xf64, #SparseVector>)
-        outs(%argx: tensor<f64>) {
+        inits(%argx: tensor<f64>) {
         ^bb(%a: f64, %b: f64, %x: f64):
           %1 = arith.mulf %a, %b : f64
           %2 = arith.addf %x, %1 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/python/test_elementwise_add_sparse_output.py b/mlir/test/Integration/Dialect/SparseTensor/python/test_elementwise_add_sparse_output.py
--- a/mlir/test/Integration/Dialect/SparseTensor/python/test_elementwise_add_sparse_output.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/python/test_elementwise_add_sparse_output.py
@@ -38,7 +38,7 @@
   %argx = bufferization.alloc_tensor() : tensor<3x4xf64, #DCSR>
   %0 = linalg.generic #trait_add_elt
     ins(%arga, %argb: tensor<3x4xf64, #DCSR>, tensor<3x4xf64, #DCSR>)
-    outs(%argx: tensor<3x4xf64, #DCSR>) {
+    inits(%argx: tensor<3x4xf64, #DCSR>) {
       ^bb(%a: f64, %b: f64, %x: f64):
         %1 = arith.addf %a, %b : f64
         linalg.yield %1 : f64
diff --git a/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir
--- a/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir
+++ b/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir
@@ -3,7 +3,7 @@
 func.func @gemm(%arg0 : memref<?x?xf32>, %arg1 : memref<?x?xf32>,
   %arg2 : memref<?x?xf32>) {
   linalg.matmul ins(%arg0, %arg1 : memref<?x?xf32>, memref<?x?xf32>)
-      outs(%arg2 : memref<?x?xf32>)
+      inits(%arg2 : memref<?x?xf32>)
   return
 }
 // CHECK-LABEL: func @gemm
@@ -34,7 +34,7 @@
                        affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d1, d0)>],
       iterator_types = ["parallel", "parallel"]}
       ins(%arg0, %arg1, %arg2 : memref<200x300xi32>, memref<300xi16>, memref<200xi8>)
-      outs(%arg3 : memref<300x200xi64>) {
+      inits(%arg3 : memref<300x200xi64>) {
     ^bb0(%b0 : i32, %b1 : i16, %b2 : i8, %b3 : i64):
       %0 = linalg.index 0 : index
       %1 = arith.index_cast %0 : index to i16
@@ -84,7 +84,7 @@
       strides = dense<[1, 2]> : tensor<2xi64>,
       dilations = dense<[3, 4]> : tensor<2xi64>}
       ins(%arg0, %arg1 : memref<?x?x?x?xf32>, memref<?x?x?x?xf32>)
-      outs(%arg2 : memref<?x?x?x?xf32>)
+      inits(%arg2 : memref<?x?x?x?xf32>)
   return
 }
 //  CHECK-DAG:  #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d1 + d4 * 3)>
@@ -128,7 +128,7 @@
       strides = dense<[1, 2]> : tensor<2xi64>,
       dilations = dense<[3, 4]> : tensor<2xi64>}
       ins(%arg0, %arg1 : memref<?x?x?x?xf32>, memref<?x?xf32>)
-      outs(%arg2 : memref<?x?x?x?xf32>)
+      inits(%arg2 : memref<?x?x?x?xf32>)
   return
 }
 //  CHECK-DAG:  #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1 + d4 * 3)>
@@ -165,7 +165,7 @@
 func.func @map(%lhs: memref<64xf32>,
     %rhs: memref<64xf32>, %out: memref<64xf32>) {
   linalg.map ins(%lhs, %rhs : memref<64xf32>, memref<64xf32>)
-             outs(%out : memref<64xf32>)
+             inits(%out : memref<64xf32>)
     (%in: f32, %in_0: f32) {
       %0 = arith.addf %in, %in_0 : f32
       linalg.yield %0 : f32
@@ -192,7 +192,7 @@
 func.func @transpose(%arg0: memref<16x32x64xf32>,
                                %arg1: memref<32x64x16xf32>) {
   linalg.transpose ins(%arg0 : memref<16x32x64xf32>)
-                   outs(%arg1 : memref<32x64x16xf32>) permutation = [1, 2, 0]
+                   inits(%arg1 : memref<32x64x16xf32>) permutation = [1, 2, 0]
   return
 }
 // CHECK-LABEL: func.func @transpose(
@@ -216,7 +216,7 @@
 func.func @reduce(%arg0: memref<16x32x64xf32>,
                   %arg1: memref<16x64xf32>) {
   linalg.reduce ins(%arg0 : memref<16x32x64xf32>)
-                outs(%arg1 : memref<16x64xf32>) dimensions = [1]
+                inits(%arg1 : memref<16x64xf32>) dimensions = [1]
     (%in: f32, %init: f32) {
       %0 = arith.addf %in, %init : f32
       linalg.yield %0 : f32
@@ -247,7 +247,7 @@
                      %init: memref<8x16x32xf32>) {
   linalg.broadcast
       ins(%input:memref<8x32xf32>)
-      outs(%init:memref<8x16x32xf32>)
+      inits(%init:memref<8x16x32xf32>)
       dimensions = [1]
   func.return
 }
diff --git a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
--- a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
+++ b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
@@ -7,10 +7,10 @@
   %d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
   %init = tensor.empty(%d0, %d1) : tensor<?x?xf32>
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<?x?xf32>) -> tensor<?x?xf32>
   %gemm = linalg.matmul {__internal_linalg_transform__ = "fusion"}
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%fill : tensor<?x?xf32>) -> tensor<?x?xf32>
+      inits(%fill : tensor<?x?xf32>) -> tensor<?x?xf32>
   return %gemm : tensor<?x?xf32>
 }
 //      CHECK: func.func @gemm_fill_fusion(
@@ -25,10 +25,10 @@
 //  CHECK-DAG:       %[[RHS_TILE:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV1]]]
 //  CHECK-DAG:       %[[INIT_TILE:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV0]], %[[IV1]]]
 //      CHECK:       %[[FILL_TILE:.+]] = linalg.fill
-// CHECK-SAME:           outs(%[[INIT_TILE]] :
+// CHECK-SAME:           inits(%[[INIT_TILE]] :
 //      CHECK:       %[[GEMM_TILE:.+]] = linalg.matmul
 // CHECK-SAME:           ins(%[[LHS_TILE]], %[[RHS_TILE]] :
-// CHECK-SAME:           outs(%[[FILL_TILE]] :
+// CHECK-SAME:           inits(%[[FILL_TILE]] :
 //      CHECK:       %[[INSERT:.+]] = tensor.insert_slice %[[GEMM_TILE]] into %[[ITERARG1]][%[[IV0]], %[[IV1]]]
 //      CHECK:       scf.yield %[[INSERT]]
 
@@ -42,15 +42,15 @@
   %d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
   %init = tensor.empty(%d0, %d1) : tensor<?x?xf32>
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<?x?xf32>) -> tensor<?x?xf32>
   %gemm = linalg.matmul
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%fill : tensor<?x?xf32>) -> tensor<?x?xf32>
+      inits(%fill : tensor<?x?xf32>) -> tensor<?x?xf32>
   %generic = linalg.generic {
       __internal_linalg_transform__ = "fusion",
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>],
       iterator_types = ["parallel", "parallel"]}
-      ins(%gemm, %arg2 : tensor<?x?xf32>, tensor<?xf32>) outs(%init : tensor<?x?xf32>) {
+      ins(%gemm, %arg2 : tensor<?x?xf32>, tensor<?xf32>) inits(%init : tensor<?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32):
       %add = arith.addf %b0, %b1 : f32
       linalg.yield %add : f32
@@ -70,15 +70,15 @@
 //  CHECK-DAG:       %[[RHS_TILE:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV1]]]
 //  CHECK-DAG:       %[[INIT_TILE:.+]] = tensor.extract_slice %[[INIT]][%[[IV0]], %[[IV1]]]
 //      CHECK:       %[[FILL_TILE:.+]] = linalg.fill
-// CHECK-SAME:           outs(%[[INIT_TILE]] :
+// CHECK-SAME:           inits(%[[INIT_TILE]] :
 //      CHECK:       %[[GEMM_TILE:.+]] = linalg.matmul
 // CHECK-SAME:           ins(%[[LHS_TILE]], %[[RHS_TILE]] :
-// CHECK-SAME:           outs(%[[FILL_TILE]] :
+// CHECK-SAME:           inits(%[[FILL_TILE]] :
 //  CHECK-DAG:       %[[BIAS_TILE:.+]] = tensor.extract_slice %[[ARG2]][%[[IV1]]]
 //  CHECK-DAG:       %[[OUTS_TILE:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV0]], %[[IV1]]]
 //      CHECK:       %[[GENERIC_TILE:.+]] = linalg.generic
 // CHECK-SAME:           ins(%[[GEMM_TILE]], %[[BIAS_TILE]] :
-// CHECK-SAME:           outs(%[[OUTS_TILE]] :
+// CHECK-SAME:           inits(%[[OUTS_TILE]] :
 //      CHECK:       %[[INSERT:.+]] = tensor.insert_slice %[[GENERIC_TILE]] into %[[ITERARG1]][%[[IV0]], %[[IV1]]]
 //      CHECK:       scf.yield %[[INSERT]]
 
@@ -91,14 +91,14 @@
   %d0 = tensor.dim %lhs0, %c0 : tensor<?x?xf32>
   %d1 = tensor.dim %rhs0, %c1 : tensor<?x?xf32>
   %init0 = tensor.empty(%d0, %d1) : tensor<?x?xf32>
-  %fill0 = linalg.fill ins(%cst : f32) outs(%init0 : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %fill0 = linalg.fill ins(%cst : f32) inits(%init0 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %gemm0 = linalg.matmul
-      ins(%lhs0, %rhs0 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%fill0 : tensor<?x?xf32>) -> tensor<?x?xf32>
+      ins(%lhs0, %rhs0 : tensor<?x?xf32>, tensor<?x?xf32>) inits(%fill0 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %d2 = tensor.dim %rhs1, %c1 : tensor<?x?xf32>
   %init1 = tensor.empty(%d0, %d2) : tensor<?x?xf32>
-  %fill1 = linalg.fill ins(%cst : f32) outs(%init1 : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %fill1 = linalg.fill ins(%cst : f32) inits(%init1 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %gemm1 = linalg.matmul  {__internal_linalg_transform__ = "gemm_fusion"}
-      ins(%gemm0, %rhs1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%fill1 : tensor<?x?xf32>) -> tensor<?x?xf32>
+      ins(%gemm0, %rhs1 : tensor<?x?xf32>, tensor<?x?xf32>) inits(%fill1 : tensor<?x?xf32>) -> tensor<?x?xf32>
   return %gemm1 : tensor<?x?xf32>
 }
 //      CHECK: func.func @gemm_gemm_fusion(
@@ -118,17 +118,17 @@
 //  CHECK-DAG:     %[[RHS0_TILE:.+]] = tensor.extract_slice %[[RHS0]][0, 0]
 //  CHECK-DAG:     %[[INIT0_TILE:.+]] = tensor.extract_slice %[[INIT0]][%[[IV]], 0]
 //      CHECK:     %[[FILL0_TILE:.+]] = linalg.fill
-// CHECK-SAME:         outs(%[[INIT0_TILE]] :
+// CHECK-SAME:         inits(%[[INIT0_TILE]] :
 //      CHECK:     %[[GEMM0_TILE:.+]] = linalg.matmul
 // CHECK-SAME:         ins(%[[LHS0_TILE]], %[[RHS0_TILE]] :
-// CHECK-SAME:         outs(%[[FILL0_TILE]] :
+// CHECK-SAME:         inits(%[[FILL0_TILE]] :
 //  CHECK-DAG:     %[[RHS1_TILE:.+]] = tensor.extract_slice %[[RHS1]][0, 0]
 //  CHECK-DAG:     %[[INIT1_TILE:.+]] = tensor.extract_slice %[[ITERARG]][%[[IV]], 0]
 //      CHECK:     %[[FILL1_TILE:.+]] = linalg.fill
-// CHECK-SAME:         outs(%[[INIT1_TILE]] :
+// CHECK-SAME:         inits(%[[INIT1_TILE]] :
 //      CHECK:     %[[GEMM1_TILE:.+]] = linalg.matmul
 // CHECK-SAME:         ins(%[[GEMM0_TILE]], %[[RHS1_TILE]] :
-// CHECK-SAME:         outs(%[[FILL1_TILE]] :
+// CHECK-SAME:         inits(%[[FILL1_TILE]] :
 //      CHECK:     %[[INSERT:.+]] = tensor.insert_slice %[[GEMM1_TILE]] into %[[ITERARG]][%[[IV]], 0]
 //      CHECK:     scf.yield %[[INSERT]]
 
@@ -141,16 +141,16 @@
   %d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
   %init0 = tensor.empty(%d0, %d1) : tensor<?x?xf32>
-  %fill = linalg.fill ins(%cst : f32) outs(%init0 : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init0 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %gemm = linalg.matmul
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%fill : tensor<?x?xf32>) -> tensor<?x?xf32>
+      inits(%fill : tensor<?x?xf32>) -> tensor<?x?xf32>
   %init1 = tensor.empty(%d1, %d0) : tensor<?x?xf32>
   %transpose = linalg.generic {
       __internal_linalg_transform__ = "fusion",
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>],
       iterator_types = ["parallel", "parallel"]}
-      ins(%gemm : tensor<?x?xf32>) outs(%init1 : tensor<?x?xf32>) {
+      ins(%gemm : tensor<?x?xf32>) inits(%init1 : tensor<?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32):
       linalg.yield %b0 : f32
   } -> tensor<?x?xf32>
@@ -173,14 +173,14 @@
 //  CHECK-DAG:       %[[RHS_TILE:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV1]]]
 //  CHECK-DAG:       %[[INIT0_TILE:.+]] = tensor.extract_slice %[[INIT0]][%[[IV0]], %[[IV1]]]
 //      CHECK:       %[[FILL_TILE:.+]] = linalg.fill
-// CHECK-SAME:           outs(%[[INIT0_TILE]] :
+// CHECK-SAME:           inits(%[[INIT0_TILE]] :
 //      CHECK:       %[[GEMM_TILE:.+]] = linalg.matmul
 // CHECK-SAME:           ins(%[[LHS_TILE]], %[[RHS_TILE]] :
-// CHECK-SAME:           outs(%[[FILL_TILE]] :
+// CHECK-SAME:           inits(%[[FILL_TILE]] :
 //  CHECK-DAG:       %[[OUTS_TILE:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV1]], %[[IV0]]]
 //      CHECK:       %[[GENERIC_TILE:.+]] = linalg.generic
 // CHECK-SAME:           ins(%[[GEMM_TILE]] :
-// CHECK-SAME:           outs(%[[OUTS_TILE]] :
+// CHECK-SAME:           inits(%[[OUTS_TILE]] :
 //      CHECK:       %[[INSERT:.+]] = tensor.insert_slice %[[GENERIC_TILE]] into %[[ITERARG1]][%[[IV1]], %[[IV0]]]
 //      CHECK:       scf.yield %[[INSERT]]
 
@@ -193,15 +193,15 @@
   %d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
   %cst = arith.constant 0.0 : f32
   %0 = tensor.empty(%d0, %d1) : tensor<?x?xf32>
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %2 = linalg.matmul
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%1 : tensor<?x?xf32>) -> tensor<?x?xf32>
+      inits(%1 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %3 = linalg.generic {
       __internal_linalg_transform__ = "gemm_interchange_fusion",
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>],
       iterator_types = ["parallel", "parallel"]}
-      ins(%2 : tensor<?x?xf32>) outs(%0 : tensor<?x?xf32>) {
+      ins(%2 : tensor<?x?xf32>) inits(%0 : tensor<?x?xf32>) {
       ^bb0(%b0 : f32, %b1 : f32):
         %4 = arith.addf %b0, %b0 : f32
         linalg.yield %4 : f32
@@ -220,14 +220,14 @@
 //  CHECK-DAG:       %[[RHS_TILE:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV0]]]
 //  CHECK-DAG:       %[[INIT_TILE:.+]] = tensor.extract_slice %[[INIT]][%[[IV1]], %[[IV0]]]
 //      CHECK:       %[[FILL_TILE:.+]] = linalg.fill
-// CHECK-SAME:           outs(%[[INIT_TILE]] :
+// CHECK-SAME:           inits(%[[INIT_TILE]] :
 //      CHECK:       %[[GEMM_TILE:.+]] = linalg.matmul
 // CHECK-SAME:           ins(%[[LHS_TILE]], %[[RHS_TILE]] :
-// CHECK-SAME:           outs(%[[FILL_TILE]] :
+// CHECK-SAME:           inits(%[[FILL_TILE]] :
 //      CHECK:       %[[INIT_TILE_2:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV1]], %[[IV0]]]
 //      CHECK:       %[[GENERIC_TILE:.+]] = linalg.generic
 // CHECK-SAME:           ins(%[[GEMM_TILE]] :
-// CHECK-SAME:           outs(%[[INIT_TILE_2]] :
+// CHECK-SAME:           inits(%[[INIT_TILE_2]] :
 //      CHECK:       %[[INSERT:.+]] = tensor.insert_slice %[[GENERIC_TILE]] into %[[ITERARG1]][%[[IV1]], %[[IV0]]]
 //      CHECK:       scf.yield %[[INSERT]]
 
@@ -240,7 +240,7 @@
   %0 = tensor.dim %arg2, %c0 : tensor<?x?xf32>
   %1 = tensor.dim %arg2, %c1 : tensor<?x?xf32>
   %2 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
+    inits(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %3 = tensor.dim %2, %c0 : tensor<?x?xf32>
   %4 = tensor.dim %2, %c1 : tensor<?x?xf32>
   %5 = tensor.empty(%3, %4) : tensor<?x?xf32>
@@ -251,7 +251,7 @@
      iterator_types = ["parallel", "parallel"],
      __internal_linalg_transform__ = "gemm_plus_gemm_fusion"}
     ins(%2, %2 : tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%5 : tensor<?x?xf32>) {
+    inits(%5 : tensor<?x?xf32>) {
     ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32) :
       %7 = arith.addf %arg3, %arg4 : f32
       linalg.yield %7 : f32
@@ -273,11 +273,11 @@
 //   CHECK-DAG:       %[[ST_ARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV0]], %[[IV1]]]
 //       CHECK:       %[[MATMUL:.+]] = linalg.matmul
 //  CHECK-SAME:         ins(%[[ST_ARG0]], %[[ST_ARG1]] :
-//  CHECK-SAME:         outs(%[[ST_ARG2]] :
+//  CHECK-SAME:         inits(%[[ST_ARG2]] :
 //       CHECK:       %[[ST_ARG6:.+]] = tensor.extract_slice %[[ARG6]][%[[IV0]], %[[IV1]]]
 //       CHECK:       %[[ST_RESULT:.+]] = linalg.generic
 //  CHECK-SAME:         ins(%[[MATMUL]], %[[MATMUL]] :
-//  CHECK-SAME:         outs(%[[ST_ARG6]] :
+//  CHECK-SAME:         inits(%[[ST_ARG6]] :
 //       CHECK:       %[[UPDATE:.+]] = tensor.insert_slice %[[ST_RESULT]]
 //  CHECK-SAME:         into %[[ARG6]][%[[IV0]], %[[IV1]]]
 //       CHECK:       scf.yield %[[UPDATE]]
@@ -293,7 +293,7 @@
   %0 = tensor.dim %arg2, %c0 : tensor<?x?xf32>
   %1 = tensor.dim %arg2, %c1 : tensor<?x?xf32>
   %2 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
+    inits(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %3 = tensor.dim %2, %c0 : tensor<?x?xf32>
   %4 = tensor.dim %2, %c1 : tensor<?x?xf32>
   %5 = tensor.empty(%3, %4) : tensor<?x?xf32>
@@ -304,7 +304,7 @@
      iterator_types = ["parallel", "parallel"],
      __internal_linalg_transform__ = "gemm_plus_gemm_fusion"}
     ins(%2, %2 : tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%5 : tensor<?x?xf32>) {
+    inits(%5 : tensor<?x?xf32>) {
     ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32) :
       %7 = arith.addf %arg3, %arg4 : f32
       linalg.yield %7 : f32
@@ -325,17 +325,17 @@
 //       CHECK:       %[[LHS:.+]] = linalg.matmul
 //  CHECK-SAME:         ins(%[[ST_ARG0]], %[[ST_ARG1]]
 //  CHECK-SAME:           : tensor<?x?xf32>, tensor<?x?xf32>)
-//  CHECK-SAME:         outs(%[[ST_ARG2]] : tensor<?x?xf32>)
+//  CHECK-SAME:         inits(%[[ST_ARG2]] : tensor<?x?xf32>)
 //   CHECK-DAG:       %[[STR_ARG0:.+]] = tensor.extract_slice %[[ARG0]][%[[IV1]], 0]
 //   CHECK-DAG:       %[[STR_ARG1:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV0]]]
 //   CHECK-DAG:       %[[STR_ARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV1]], %[[IV0]]]
 //       CHECK:       %[[RHS:.+]] = linalg.matmul
 //  CHECK-SAME:         ins(%[[STR_ARG0]], %[[STR_ARG1]] :
-//  CHECK-SAME:         outs(%[[STR_ARG2]] :
+//  CHECK-SAME:         inits(%[[STR_ARG2]] :
 //       CHECK:       %[[ST_ARG6:.+]] = tensor.extract_slice %[[ARG6]][%[[IV0]], %[[IV1]]]
 //       CHECK:       %[[ST_RESULT:.+]] = linalg.generic
 //  CHECK-SAME:         ins(%[[LHS]], %[[RHS]] :
-//  CHECK-SAME:         outs(%[[ST_ARG6]] :
+//  CHECK-SAME:         inits(%[[ST_ARG6]] :
 //       CHECK:       %[[UPDATE:.+]] = tensor.insert_slice %[[ST_RESULT]]
 //  CHECK-SAME:         into %[[ARG6]][%[[IV0]], %[[IV1]]]
 //       CHECK:       scf.yield %[[UPDATE]]
@@ -348,13 +348,13 @@
     %arg2: tensor<?x?xf32>, %arg3: tensor<?x?xf32>, %arg4: tensor<?x?xf32>,
     %arg5: tensor<?x?xf32>, %arg6: tensor<?x?xf32>) -> tensor<?x?xf32> {
   %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32> // [M, N0] * [N0, N1]
+    inits(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32> // [M, N0] * [N0, N1]
   %1 = linalg.matmul ins(%0, %arg3 : tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%arg4 : tensor<?x?xf32>) -> tensor<?x?xf32> // [M, N1] * [N1, N2]
+    inits(%arg4 : tensor<?x?xf32>) -> tensor<?x?xf32> // [M, N1] * [N1, N2]
   %2 = linalg.matmul
     {__internal_linalg_transform__ = "gemm_sequence_fusion"}
     ins(%1, %arg5 : tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%arg6 : tensor<?x?xf32>) -> tensor<?x?xf32> // [M, N2] * [N2, N3]
+    inits(%arg6 : tensor<?x?xf32>) -> tensor<?x?xf32> // [M, N2] * [N2, N3]
   return %2 : tensor<?x?xf32>
 }
 
@@ -383,16 +383,16 @@
 //   CHECK-DAG:     %[[SLICE_ARG1:.+]] = tensor.extract_slice %[[ARG1]][0, 0] [%[[N0]], %[[N1]]]
 //   CHECK-DAG:     %[[SLICE_ARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV]], 0] [%[[TILE_M]], %[[N1]]]
 //   CHECK-DAG:     %[[TILE_GEMM1:.+]] = linalg.matmul ins(%[[SLICE_ARG0]], %[[SLICE_ARG1]] :
-//  CHECK-SAME:         outs(%[[SLICE_ARG2]] :
+//  CHECK-SAME:         inits(%[[SLICE_ARG2]] :
 //   CHECK-DAG:     %[[SLICE_ARG3:.+]] = tensor.extract_slice %[[ARG3]][0, 0] [%[[N1]], %[[N2]]]
 //   CHECK-DAG:     %[[SLICE_ARG4:.+]] = tensor.extract_slice %[[ARG4]][%[[IV]], 0] [%[[TILE_M]], %[[N2]]]
 //   CHECK-DAG:     %[[TILE_GEMM2:.+]] = linalg.matmul ins(%[[TILE_GEMM1]], %[[SLICE_ARG3]] :
-//  CHECK-SAME:         outs(%[[SLICE_ARG4]] :
+//  CHECK-SAME:         inits(%[[SLICE_ARG4]] :
 //   CHECK-DAG:     %[[SLICE_ARG5:.+]] = tensor.extract_slice %[[ARG5]][0, 0] [%[[N2]], %[[N3]]]
 //   CHECK-DAG:     %[[SLICE_ARG6:.+]] = tensor.extract_slice %[[ARG8]][%[[IV]], 0] [%[[TILE_M]], %[[N3]]]
 //   CHECK-DAG:     %[[TILE_GEMM3:.+]] = linalg.matmul
 //  CHECK-SAME:         ins(%[[TILE_GEMM2]], %[[SLICE_ARG5]] :
-//  CHECK-SAME:         outs(%[[SLICE_ARG6]] :
+//  CHECK-SAME:         inits(%[[SLICE_ARG6]] :
 //       CHECK:     %[[UPDATE:.+]] = tensor.insert_slice %[[TILE_GEMM3]] into %[[ARG8]][%[[IV]], 0] [%[[TILE_M]], %[[N3]]]
 //       CHECK:     scf.yield %[[UPDATE]]
 
@@ -402,22 +402,22 @@
   %cst = arith.constant 0.000000e+00 : f32
   %cst_0 = arith.constant 0xFF800000 : f32
   %0 = tensor.empty() : tensor<30xf32>
-  %1 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<30xf32>) -> tensor<30xf32>
+  %1 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<30xf32>) -> tensor<30xf32>
   %2 = linalg.generic {
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>],
       iterator_types = ["parallel", "reduction"]}
-      ins(%arg0 : tensor<30x3xf32>) outs(%1 : tensor<30xf32>) {
+      ins(%arg0 : tensor<30x3xf32>) inits(%1 : tensor<30xf32>) {
     ^bb0(%arg1: f32, %arg2: f32):
       %8 = arith.maxf %arg2, %arg1 : f32
       linalg.yield %8 : f32
     } -> tensor<30xf32>
   %3 = tensor.empty() : tensor<30x3xf32>
-  %4 = linalg.fill ins(%cst : f32) outs(%0 : tensor<30xf32>) -> tensor<30xf32>
+  %4 = linalg.fill ins(%cst : f32) inits(%0 : tensor<30xf32>) -> tensor<30xf32>
   %5:2 = linalg.generic {
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>,
                        affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d0, d1)>],
       iterator_types = ["parallel", "reduction"]}
-      ins(%arg0, %2 : tensor<30x3xf32>, tensor<30xf32>) outs(%4, %3 : tensor<30xf32>, tensor<30x3xf32>) {
+      ins(%arg0, %2 : tensor<30x3xf32>, tensor<30xf32>) inits(%4, %3 : tensor<30xf32>, tensor<30x3xf32>) {
     ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32):
       %8 = arith.subf %arg1, %arg2 : f32
       %9 = math.exp %8 : f32
@@ -429,7 +429,7 @@
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>,
                        affine_map<(d0, d1) -> (d0, d1)>],
       iterator_types = ["parallel", "parallel"]}
-      ins(%5#1, %5#0 : tensor<30x3xf32>, tensor<30xf32>) outs(%3 : tensor<30x3xf32>) {
+      ins(%5#1, %5#0 : tensor<30x3xf32>, tensor<30xf32>) inits(%3 : tensor<30x3xf32>) {
     ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
       %8 = arith.divf %arg1, %arg2 : f32
       linalg.yield %8 : f32
@@ -444,20 +444,20 @@
 //   CHECK-DAG:     %[[ARG0_SLICE:.+]] = tensor.extract_slice %[[ARG0]][%[[IV]], 0]
 //   CHECK-DAG:     %[[INIT0_SLICE:.+]] = tensor.extract_slice %[[INIT0]][%[[IV]]]
 //       CHECK:     %[[FILL0:.+]] = linalg.fill
-//  CHECK-SAME:         outs(%[[INIT0_SLICE]] :
+//  CHECK-SAME:         inits(%[[INIT0_SLICE]] :
 //       CHECK:     %[[GENERIC0:.+]] = linalg.generic
 //  CHECK-SAME:         ins(%[[ARG0_SLICE]] :
-//  CHECK-SAME:         outs(%[[FILL0]] :
+//  CHECK-SAME:         inits(%[[FILL0]] :
 //       CHECK:     %[[FILL1:.+]] = linalg.fill
-//  CHECK-SAME:         outs(%[[INIT0_SLICE]] :
+//  CHECK-SAME:         inits(%[[INIT0_SLICE]] :
 //       CHECK:     %[[INIT1_SLICE:.+]] = tensor.extract_slice %[[INIT1]][%[[IV]], 0]
 //       CHECK:     %[[GENERIC1:.+]]:2 = linalg.generic
 //  CHECK-SAME:         ins(%[[ARG0_SLICE]], %[[GENERIC0]] :
-//  CHECK-SAME:         outs(%[[FILL1]], %[[INIT1_SLICE]] :
+//  CHECK-SAME:         inits(%[[FILL1]], %[[INIT1_SLICE]] :
 //       CHECK:     %[[ITERARG0_SLICE:.+]] = tensor.extract_slice %[[ITERARG0]][%[[IV]], 0]
 //       CHECK:     %[[GENERIC2:.+]] = linalg.generic
 //  CHECK-SAME:         ins(%[[GENERIC1]]#1, %[[GENERIC1]]#0 :
-//  CHECK-SAME:         outs(%[[ITERARG0_SLICE]] :
+//  CHECK-SAME:         inits(%[[ITERARG0_SLICE]] :
 //   CHECK-DAG:     %[[INSERTSLICE:.+]] = tensor.insert_slice %[[GENERIC2]] into %[[ITERARG0]][%[[IV]], 0]
 //       CHECK:     scf.yield %[[INSERTSLICE]]
 //       CHECK:   return %[[RESULT]]
diff --git a/mlir/test/Interfaces/TilingInterface/tile-fuse-and-yield-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-fuse-and-yield-using-interface.mlir
--- a/mlir/test/Interfaces/TilingInterface/tile-fuse-and-yield-using-interface.mlir
+++ b/mlir/test/Interfaces/TilingInterface/tile-fuse-and-yield-using-interface.mlir
@@ -8,13 +8,13 @@
   %cst = arith.constant 0.0 : f32
   %d0 = tensor.dim %lhs0, %c0 : tensor<?x?xf32>
   %d1 = tensor.dim %rhs0, %c1 : tensor<?x?xf32>
-  %fill0 = linalg.fill ins(%cst : f32) outs(%init0 : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %fill0 = linalg.fill ins(%cst : f32) inits(%init0 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %gemm0 = linalg.matmul
-      ins(%lhs0, %rhs0 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%fill0 : tensor<?x?xf32>) -> tensor<?x?xf32>
+      ins(%lhs0, %rhs0 : tensor<?x?xf32>, tensor<?x?xf32>) inits(%fill0 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %d2 = tensor.dim %rhs1, %c1 : tensor<?x?xf32>
-  %fill1 = linalg.fill ins(%cst : f32) outs(%init1 : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %fill1 = linalg.fill ins(%cst : f32) inits(%init1 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %gemm1 = linalg.matmul  {__internal_linalg_transform__ = "gemm_sequence_fusion_and_yield"}
-      ins(%gemm0, %rhs1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%fill1 : tensor<?x?xf32>) -> tensor<?x?xf32>
+      ins(%gemm0, %rhs1 : tensor<?x?xf32>, tensor<?x?xf32>) inits(%fill1 : tensor<?x?xf32>) -> tensor<?x?xf32>
   return %gemm0, %gemm1 : tensor<?x?xf32>, tensor<?x?xf32>
 }
 //      CHECK: func.func @gemm_gemm_fusion_yield_both(
@@ -31,17 +31,17 @@
 //  CHECK-DAG:     %[[RHS0_TILE:.+]] = tensor.extract_slice %[[RHS0]][0, 0]
 //  CHECK-DAG:     %[[INIT0_TILE:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV]], 0]
 //      CHECK:     %[[FILL0_TILE:.+]] = linalg.fill
-// CHECK-SAME:         outs(%[[INIT0_TILE]] :
+// CHECK-SAME:         inits(%[[INIT0_TILE]] :
 //      CHECK:     %[[GEMM0_TILE:.+]] = linalg.matmul
 // CHECK-SAME:         ins(%[[LHS0_TILE]], %[[RHS0_TILE]] :
-// CHECK-SAME:         outs(%[[FILL0_TILE]] :
+// CHECK-SAME:         inits(%[[FILL0_TILE]] :
 //  CHECK-DAG:     %[[RHS1_TILE:.+]] = tensor.extract_slice %[[RHS1]][0, 0]
 //  CHECK-DAG:     %[[INIT1_TILE:.+]] = tensor.extract_slice %[[ITERARG0]][%[[IV]], 0]
 //      CHECK:     %[[FILL1_TILE:.+]] = linalg.fill
-// CHECK-SAME:         outs(%[[INIT1_TILE]] :
+// CHECK-SAME:         inits(%[[INIT1_TILE]] :
 //      CHECK:     %[[GEMM1_TILE:.+]] = linalg.matmul
 // CHECK-SAME:         ins(%[[GEMM0_TILE]], %[[RHS1_TILE]] :
-// CHECK-SAME:         outs(%[[FILL1_TILE]] :
+// CHECK-SAME:         inits(%[[FILL1_TILE]] :
 //      CHECK:     %[[INSERT0:.+]] = tensor.insert_slice %[[GEMM1_TILE]] into %[[ITERARG0]][%[[IV]], 0]
 //      CHECK:     %[[INSERT1:.+]] = tensor.insert_slice %[[GEMM0_TILE]] into %[[ITERARG1]][%[[IV]], 0]
 //      CHECK:     scf.yield %[[INSERT0]], %[[INSERT1]]
diff --git a/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir
--- a/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir
+++ b/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir
@@ -4,7 +4,7 @@
     %arg2 : tensor<?x?xf32>) -> tensor<?x?xf32> {
   %0 = linalg.matmul {__internal_linalg_transform__ = "simple_gemm"}
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
+      inits(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
   return %0 : tensor<?x?xf32>
 }
 //  CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)>
@@ -34,7 +34,7 @@
 // CHECK-SAME:           [%[[IV0]], %[[IV1]]] [%[[TS_Y]], %[[TS_X]]] [1, 1]
 //      CHECK:       %[[GEMM_TILE:.+]] = linalg.matmul
 // CHECK-SAME:           ins(%[[LHS_TILE]], %[[RHS_TILE]] :
-// CHECK-SAME:           outs(%[[INIT_TILE]] :
+// CHECK-SAME:           inits(%[[INIT_TILE]] :
 //      CHECK:       %[[UPDATE:.+]] = tensor.insert_slice %[[GEMM_TILE]] into %[[INIT1]]
 // CHECK-SAME:           [%[[IV0]], %[[IV1]]] [%[[TS_Y]], %[[TS_X]]] [1, 1]
 //      CHECK:       scf.yield %[[UPDATE]]
@@ -47,7 +47,7 @@
     %arg2 : memref<?x?xf32>) {
   linalg.matmul {__internal_linalg_transform__ = "simple_gemm_memref"}
       ins(%arg0, %arg1 : memref<?x?xf32>, memref<?x?xf32>)
-      outs(%arg2 : memref<?x?xf32>)
+      inits(%arg2 : memref<?x?xf32>)
   return
 }
 //  CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)>
@@ -79,7 +79,7 @@
 // CHECK-SAME:             [%[[IV0]], %[[IV1]]] [%[[TS_M]], %[[TS_N]]] [1, 1]
 //      CHECK:         linalg.matmul
 // CHECK-SAME:             ins(%[[LHS_TILE]], %[[RHS_TILE]] :
-// CHECK-SAME:             outs(%[[OUT_TILE]] :
+// CHECK-SAME:             inits(%[[OUT_TILE]] :
 
 // -----
 
@@ -94,7 +94,7 @@
       iterator_types = ["parallel", "parallel", "parallel"]}
       {__internal_linalg_transform__ = "parallel_generic_transpose"}
       ins(%arg0 : tensor<128x200x300xf32>)
-      outs(%init0, %init1 : tensor<128x300x200xf32>, tensor<300x128x200xf32>) {
+      inits(%init0, %init1 : tensor<128x300x200xf32>, tensor<300x128x200xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32):
       linalg.yield %b0, %b0 : f32, f32
     } -> (tensor<128x300x200xf32>, tensor<300x128x200xf32>)
@@ -123,7 +123,7 @@
 // CHECK-SAME:           [%[[IV1]], %[[IV0]], 0] [20, %[[TS_Y]], 200] [1, 1, 1]
 //      CHECK:       %[[RESULT_TILE:.+]]:2 = linalg.generic
 // CHECK-SAME:           ins(%[[ARG_TILE]] :
-// CHECK-SAME:           outs(%[[INIT0_TILE]], %[[INIT1_TILE]] :
+// CHECK-SAME:           inits(%[[INIT0_TILE]], %[[INIT1_TILE]] :
 //      CHECK:       %[[UPDATE0:.+]] = tensor.insert_slice %[[RESULT_TILE]]#0 into %[[ARG3]]
 // CHECK-SAME:           [%[[IV0]], %[[IV1]], 0] [%[[TS_Y]], 20, 200] [1, 1, 1]
 //      CHECK:       %[[UPDATE1:.+]] = tensor.insert_slice %[[RESULT_TILE]]#1 into %[[ARG4]]
@@ -141,7 +141,7 @@
       dilation = dense<[4, 5]> : tensor<2xi64>,
       __internal_linalg_transform__ = "simple_conv"}
       ins(%arg0, %arg1 : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
-      outs(%arg2 : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
+      inits(%arg2 : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
   return %0 : tensor<?x?x?x?xf32>
 }
 //  CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)>
@@ -187,7 +187,7 @@
 //      CHECK:         %[[CONV_TILE:.+]] = linalg.conv_2d_nhwc_hwcf
 // CHECK-SAME:             dilation = dense<[4, 5]> : tensor<2xi64>, strides = dense<[2, 3]> : tensor<2xi64>
 // CHECK-SAME:             ins(%[[INPUT_TILE]], %[[FILTER_TILE]] :
-// CHECK-SAME:             outs(%[[INIT_TILE]] :
+// CHECK-SAME:             inits(%[[INIT_TILE]] :
 //      CHECK:         tensor.insert_slice %[[CONV_TILE]] into %[[INIT2]]
 // CHECK-SAME:             [0, 0, 0, 0] [%[[N]], %[[R]], %[[S]], %[[F]]]
 
@@ -207,7 +207,7 @@
     iterator_types = ["parallel", "parallel"]}
     {__internal_linalg_transform__ = "indexed_semantics"}
     ins(%arg0: tensor<?x?xf32>)
-    outs(%arg1: tensor<?x?xf32>) {
+    inits(%arg1: tensor<?x?xf32>) {
   ^bb0(%arg2: f32, %arg3: f32):
     // CHECK: %[[INDEX0:.+]] = linalg.index 0
     // CHECK: %[[INDEX0_AMENDED:.+]] = affine.apply #[[$MAP_ADD]](%[[INDEX0]], %[[I0]])
@@ -231,7 +231,7 @@
     %arg2 : tensor<?x?xf32>) -> tensor<?x?xf32> {
   %0 = linalg.matmul {__internal_linalg_transform__ = "gemm_interchange"}
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
+      inits(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
   return %0 : tensor<?x?xf32>
 }
 //  CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (20, -d0 + s0)>
@@ -266,7 +266,7 @@
 // CHECK-SAME:             [%[[IV2]], %[[IV0]]] [%[[TS_M]], %[[TS_N]]] [1, 1]
 //      CHECK:         %[[GEMM_TILE:.+]] = linalg.matmul
 // CHECK-SAME:             ins(%[[LHS_TILE]], %[[RHS_TILE]] :
-// CHECK-SAME:             outs(%[[INIT_TILE]] :
+// CHECK-SAME:             inits(%[[INIT_TILE]] :
 //      CHECK:         %[[UPDATE:.+]] = tensor.insert_slice %[[GEMM_TILE]] into %[[INIT2]]
 // CHECK-SAME:             [%[[IV2]], %[[IV0]]] [%[[TS_M]], %[[TS_N]]] [1, 1]
 //      CHECK:         scf.yield %[[UPDATE]]
diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td
--- a/mlir/test/lib/Dialect/Test/TestOps.td
+++ b/mlir/test/lib/Dialect/Test/TestOps.td
@@ -2832,7 +2832,7 @@
 
   let assemblyFormat = [{
     attr-dict (`ins` `(` $inputs^ `:` type($inputs) `)`)?
-    `outs` `(` $outputs `:` type($outputs) `)`
+    `inits` `(` $outputs `:` type($outputs) `)`
     $region (`->` type($results)^)?
   }];
 
@@ -2894,7 +2894,7 @@
 
   let assemblyFormat = [{
     attr-dict (`ins` `(` $inputs^ `:` type($inputs) `)`)?
-    `outs` `(` $outputs `:` type($outputs) `)`
+    `inits` `(` $outputs `:` type($outputs) `)`
     $region (`->` type($results)^)?
   }];
 
diff --git a/mlir/test/mlir-cpu-runner/async.mlir b/mlir/test/mlir-cpu-runner/async.mlir
--- a/mlir/test/mlir-cpu-runner/async.mlir
+++ b/mlir/test/mlir-cpu-runner/async.mlir
@@ -25,7 +25,7 @@
   %c4 = arith.constant 4.0 : f32
 
   %A = memref.alloc() : memref<4xf32>
-  linalg.fill ins(%c0 : f32) outs(%A : memref<4xf32>)
+  linalg.fill ins(%c0 : f32) inits(%A : memref<4xf32>)
 
   // CHECK: [0, 0, 0, 0]
   %U = memref.cast %A :  memref<4xf32> to memref<*xf32>
diff --git a/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir b/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir
--- a/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir
+++ b/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir
@@ -7,14 +7,14 @@
 
   %cf1 = arith.constant 1.00000e+00 : f32
 
-  linalg.fill ins(%cf1 : f32) outs(%A : memref<16x16xf32>)
-  linalg.fill ins(%cf1 : f32) outs(%B : memref<16x16xf32>)
+  linalg.fill ins(%cf1 : f32) inits(%A : memref<16x16xf32>)
+  linalg.fill ins(%cf1 : f32) inits(%B : memref<16x16xf32>)
 
   %num_reps = arith.constant 5 : index
 
   %t_start = call @rtclock() : () -> f64
   affine.for %arg0 = 0 to %num_reps {
-    linalg.fill ins(%cf1 : f32) outs(%C : memref<16x16xf32>)
+    linalg.fill ins(%cf1 : f32) inits(%C : memref<16x16xf32>)
     func.call @sgemm_naive(%A, %B, %C) : (memref<16x16xf32>, memref<16x16xf32>, memref<16x16xf32>) -> ()
   }
   %t_end = call @rtclock() : () -> f64
diff --git a/mlir/test/mlir-cpu-runner/unranked-memref.mlir b/mlir/test/mlir-cpu-runner/unranked-memref.mlir
--- a/mlir/test/mlir-cpu-runner/unranked-memref.mlir
+++ b/mlir/test/mlir-cpu-runner/unranked-memref.mlir
@@ -42,18 +42,18 @@
     %f10 = arith.constant 10.00000e+00 : f32
 
     %V = memref.cast %A : memref<10x3xf32, 0> to memref<?x?xf32>
-    linalg.fill ins(%f10 : f32) outs(%V : memref<?x?xf32, 0>)
+    linalg.fill ins(%f10 : f32) inits(%V : memref<?x?xf32, 0>)
     %U = memref.cast %A : memref<10x3xf32, 0> to memref<*xf32>
     call @printMemrefF32(%U) : (memref<*xf32>) -> ()
 
     %V2 = memref.cast %U : memref<*xf32> to memref<?x?xf32>
-    linalg.fill ins(%f5 : f32) outs(%V2 : memref<?x?xf32, 0>)
+    linalg.fill ins(%f5 : f32) inits(%V2 : memref<?x?xf32, 0>)
     %U2 = memref.cast %V2 : memref<?x?xf32, 0> to memref<*xf32>
     call @printMemrefF32(%U2) : (memref<*xf32>) -> ()
 
     %V3 = memref.cast %V2 : memref<?x?xf32> to memref<*xf32>
     %V4 = memref.cast %V3 : memref<*xf32> to memref<?x?xf32>
-    linalg.fill ins(%f2 : f32) outs(%V4 : memref<?x?xf32, 0>)
+    linalg.fill ins(%f2 : f32) inits(%V4 : memref<?x?xf32, 0>)
     %U3 = memref.cast %V2 : memref<?x?xf32> to memref<*xf32>
     call @printMemrefF32(%U3) : (memref<*xf32>) -> ()
 
@@ -79,7 +79,7 @@
 func.func @return_two_var_memref_caller() {
   %0 = memref.alloca() : memref<4x3xf32>
   %c0f32 = arith.constant 1.0 : f32
-  linalg.fill ins(%c0f32 : f32) outs(%0 : memref<4x3xf32>)
+  linalg.fill ins(%c0f32 : f32) inits(%0 : memref<4x3xf32>)
   %1:2 = call @return_two_var_memref(%0) : (memref<4x3xf32>) -> (memref<*xf32>, memref<*xf32>)
   call @printMemrefF32(%1#0) : (memref<*xf32>) -> ()
   call @printMemrefF32(%1#1) : (memref<*xf32>) -> ()
@@ -94,7 +94,7 @@
 func.func @return_var_memref_caller() {
   %0 = memref.alloca() : memref<4x3xf32>
   %c0f32 = arith.constant 1.0 : f32
-  linalg.fill ins(%c0f32 : f32) outs(%0 : memref<4x3xf32>)
+  linalg.fill ins(%c0f32 : f32) inits(%0 : memref<4x3xf32>)
   %1 = call @return_var_memref(%0) : (memref<4x3xf32>) -> memref<*xf32>
   call @printMemrefF32(%1) : (memref<*xf32>) -> ()
   return
diff --git a/mlir/test/mlir-cpu-runner/utils.mlir b/mlir/test/mlir-cpu-runner/utils.mlir
--- a/mlir/test/mlir-cpu-runner/utils.mlir
+++ b/mlir/test/mlir-cpu-runner/utils.mlir
@@ -19,7 +19,7 @@
   %f = arith.constant 2.00000e+00 : f32
   %A = memref.alloc() : memref<16xf32>
   %B = memref.cast %A: memref<16xf32> to memref<?xf32>
-  linalg.fill ins(%f : f32) outs(%B : memref<?xf32>)
+  linalg.fill ins(%f : f32) inits(%B : memref<?xf32>)
   %U = memref.cast %B :  memref<?xf32> to memref<*xf32>
   call @printMemrefF32(%U): (memref<*xf32>) -> ()
   memref.dealloc %A : memref<16xf32>
@@ -33,7 +33,7 @@
   %f4 = arith.constant 4.00000e+00 : f32
   %A = memref.alloc() : memref<3x4x5xf32>
   %B = memref.cast %A: memref<3x4x5xf32> to memref<?x?x?xf32>
-  linalg.fill ins(%f : f32) outs(%B : memref<?x?x?xf32>)
+  linalg.fill ins(%f : f32) inits(%B : memref<?x?x?xf32>)
 
   %c2 = arith.constant 2 : index
   memref.store %f4, %B[%c2, %c2, %c2]: memref<?x?x?xf32>
diff --git a/mlir/test/mlir-opt/async.mlir b/mlir/test/mlir-opt/async.mlir
--- a/mlir/test/mlir-opt/async.mlir
+++ b/mlir/test/mlir-opt/async.mlir
@@ -20,7 +20,7 @@
   %c4 = arith.constant 4.0 : f32
 
   %A = memref.alloc() : memref<4xf32>
-  linalg.fill ins(%c0 : f32) outs(%A : memref<4xf32>)
+  linalg.fill ins(%c0 : f32) inits(%A : memref<4xf32>)
 
   %U = memref.cast %A :  memref<4xf32> to memref<*xf32>
   call @printMemrefF32(%U): (memref<*xf32>) -> ()
diff --git a/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py b/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py
--- a/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py
+++ b/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py
@@ -56,7 +56,7 @@
     # CHECK-SAME: indexing_maps = [#[[$MUL_MAP_A]], #[[$MUL_MAP_B]], #[[$MUL_MAP_C]]]
     # CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]
     # CHECK-SAME: ins(%[[A]], %[[B]]
-    # CHECK-SAME: outs(%[[INITC]]
+    # CHECK-SAME: inits(%[[INITC]]
     @func.FuncOp.from_py_func(
         RankedTensorType.get((4, 16), f32), RankedTensorType.get((16, 8), f32))
     def test_matmul_mono(lhs, rhs):
diff --git a/mlir/test/python/dialects/linalg/ops.py b/mlir/test/python/dialects/linalg/ops.py
--- a/mlir/test/python/dialects/linalg/ops.py
+++ b/mlir/test/python/dialects/linalg/ops.py
@@ -21,7 +21,7 @@
       # CHECK-LABEL: func @fill_tensor
       #  CHECK-SAME:   %[[OUT:[0-9a-z]+]]: tensor<12x?xf32>
       #  CHECK-NEXT: %[[CST:.*]] = arith.constant 0.0{{.*}} : f32
-      #  CHECK-NEXT: %[[RES:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[OUT]] : tensor<12x?xf32>) -> tensor<12x?xf32>
+      #  CHECK-NEXT: %[[RES:.*]] = linalg.fill ins(%[[CST]] : f32) inits(%[[OUT]] : tensor<12x?xf32>) -> tensor<12x?xf32>
       #  CHECK-NEXT: return %[[RES]] : tensor<12x?xf32>
       @func.FuncOp.from_py_func(
           RankedTensorType.get((12, ShapedType.get_dynamic_size()), f32))
@@ -32,7 +32,7 @@
       # CHECK-LABEL: func @fill_buffer
       #  CHECK-SAME:   %[[OUT:[0-9a-z]+]]: memref<12x?xf32>
       #  CHECK-NEXT: %[[CST:.*]] = arith.constant 0.0{{.*}} : f32
-      #  CHECK-NEXT: linalg.fill ins(%[[CST]] : f32) outs(%[[OUT]] : memref<12x?xf32>)
+      #  CHECK-NEXT: linalg.fill ins(%[[CST]] : f32) inits(%[[OUT]] : memref<12x?xf32>)
       #  CHECK-NEXT: return
       @func.FuncOp.from_py_func(
           MemRefType.get((12, ShapedType.get_dynamic_size()), f32))
@@ -59,12 +59,12 @@
         #      CHECK: linalg.elemwise_unary
         # CHECK-SAME:    cast = #linalg.type_fn<cast_signed>
         # CHECK-SAME:    fun = #linalg.unary_fn<exp>
-        # CHECK-SAME:    ins(%{{.*}} : tensor<4x8xf32>) outs(%{{.*}} : tensor<4x8xf32>)
+        # CHECK-SAME:    ins(%{{.*}} : tensor<4x8xf32>) inits(%{{.*}} : tensor<4x8xf32>)
         unary_result = linalg.elemwise_unary(lhs, outs=[init_result.result])
         #      CHECK: linalg.elemwise_binary
         # CHECK-SAME:    cast = #linalg.type_fn<cast_unsigned>
         # CHECK-SAME:    fun = #linalg.binary_fn<mul>
-        # CHECK-SAME:    ins(%{{.*}}, %{{.*}} : tensor<4x8xf32>, tensor<4x8xf32>) outs(%{{.*}} : tensor<4x8xf32>)
+        # CHECK-SAME:    ins(%{{.*}}, %{{.*}} : tensor<4x8xf32>, tensor<4x8xf32>) inits(%{{.*}} : tensor<4x8xf32>)
         #      CHECK: return
         binary_result = linalg.elemwise_binary(
             lhs,
@@ -144,7 +144,7 @@
         init = tensor.EmptyOp([4, 8], f32)
         # CHECK: linalg.matmul
         # CHECK: ins(%[[LHS]], %[[RHS]]
-        # CHECK: outs(%[[INIT]]
+        # CHECK: inits(%[[INIT]]
         return linalg.matmul(lhs, rhs, outs=init)
 
   print(module)
diff --git a/mlir/test/python/integration/dialects/linalg/opsrun.py b/mlir/test/python/integration/dialects/linalg/opsrun.py
--- a/mlir/test/python/integration/dialects/linalg/opsrun.py
+++ b/mlir/test/python/integration/dialects/linalg/opsrun.py
@@ -29,10 +29,10 @@
   %rhs = memref.alloc() : memref<4x8xf32>
   %O0 = memref.alloc() : memref<4x8xf32>
   %O1 = memref.alloc() : memref<4x8xf32>
-  linalg.fill ins(%v1 : f32) outs(%lhs : memref<f32>)
-  linalg.fill ins(%v2 : f32) outs(%rhs : memref<4x8xf32>)
-  linalg.fill ins(%v0 : f32) outs(%O0 : memref<4x8xf32>)
-  linalg.fill ins(%v0 : f32) outs(%O1 : memref<4x8xf32>)
+  linalg.fill ins(%v1 : f32) inits(%lhs : memref<f32>)
+  linalg.fill ins(%v2 : f32) inits(%rhs : memref<4x8xf32>)
+  linalg.fill ins(%v0 : f32) inits(%O0 : memref<4x8xf32>)
+  linalg.fill ins(%v0 : f32) inits(%O1 : memref<4x8xf32>)
 
   call @elemwise_exp_add_on_buffers(%lhs, %rhs, %O0) :
     (memref<f32>, memref<4x8xf32>, memref<4x8xf32>) -> ()
@@ -60,10 +60,10 @@
   %B = memref.alloc() : memref<16x8xf32>
   %C0 = memref.alloc() : memref<4x8xf32>
   %C1 = memref.alloc() : memref<4x8xf32>
-  linalg.fill ins(%v1 : i8) outs(%A : memref<4x16xi8>)
-  linalg.fill ins(%v2 : f32) outs(%B : memref<16x8xf32>)
-  linalg.fill ins(%v0 : f32) outs(%C0 : memref<4x8xf32>)
-  linalg.fill ins(%v0 : f32) outs(%C1 : memref<4x8xf32>)
+  linalg.fill ins(%v1 : i8) inits(%A : memref<4x16xi8>)
+  linalg.fill ins(%v2 : f32) inits(%B : memref<16x8xf32>)
+  linalg.fill ins(%v0 : f32) inits(%C0 : memref<4x8xf32>)
+  linalg.fill ins(%v0 : f32) inits(%C1 : memref<4x8xf32>)
 
   call @matmul_signed_on_buffers(%A, %B, %C0) :
     (memref<4x16xi8>, memref<16x8xf32>, memref<4x8xf32>) -> ()
@@ -137,9 +137,9 @@
   %input = memref.alloc() : memref<1x4x16x1xf64>
   %filter = memref.alloc() : memref<2x2x1xf64>
   %output = memref.alloc() : memref<1x2x4x1xi32>
-  linalg.fill ins(%v1 : f64) outs(%input : memref<1x4x16x1xf64>)
-  linalg.fill ins(%v2 : f64) outs(%filter : memref<2x2x1xf64>)
-  linalg.fill ins(%v0 : i32) outs(%output : memref<1x2x4x1xi32>)
+  linalg.fill ins(%v1 : f64) inits(%input : memref<1x4x16x1xf64>)
+  linalg.fill ins(%v2 : f64) inits(%filter : memref<2x2x1xf64>)
+  linalg.fill ins(%v0 : i32) inits(%output : memref<1x2x4x1xi32>)
 
   call @conv_on_buffers(%input, %filter, %output) :
     (memref<1x4x16x1xf64>, memref<2x2x1xf64>, memref<1x2x4x1xi32>) -> ()
@@ -163,9 +163,9 @@
   %input = memref.alloc() : memref<1x4x16x1xf64>
   %shape = memref.alloc() : memref<2x2xf64>
   %output = memref.alloc() : memref<1x2x4x1xi32>
-  linalg.fill ins(%v1 : f64) outs(%input : memref<1x4x16x1xf64>)
-  linalg.fill ins(%v1 : f64) outs(%shape : memref<2x2xf64>)
-  linalg.fill ins(%v0 : i32) outs(%output : memref<1x2x4x1xi32>)
+  linalg.fill ins(%v1 : f64) inits(%input : memref<1x4x16x1xf64>)
+  linalg.fill ins(%v1 : f64) inits(%shape : memref<2x2xf64>)
+  linalg.fill ins(%v0 : i32) inits(%output : memref<1x2x4x1xi32>)
 
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index