diff --git a/mlir/docs/Dialects/Linalg/OpDSL.md b/mlir/docs/Dialects/Linalg/OpDSL.md
--- a/mlir/docs/Dialects/Linalg/OpDSL.md
+++ b/mlir/docs/Dialects/Linalg/OpDSL.md
@@ -318,8 +318,8 @@
 `fill` with arbitrary ranked output tensors:
 
 ```python
-tensor_2d = linalg.InitTensorOp([4, 8], f32)
-tensor_3d = linalg.InitTensorOp([4, 8, 16], f32)
+tensor_2d = tensor.EmptyOp([4, 8], f32)
+tensor_3d = tensor.EmptyOp([4, 8, 16], f32)
 fill(value, outs=[tensor_2d])
 fill(value, outs=[tensor_3d])
 ```
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@@ -24,110 +24,6 @@
 class Linalg_Op<string mnemonic, list<Trait> traits = []> :
     Op<Linalg_Dialect, mnemonic, traits>;
 
-def Linalg_InitTensorOp : Linalg_Op<"init_tensor",
-    [NoSideEffect,
-     DeclareOpInterfaceMethods<ReifyRankedShapedTypeOpInterface>]> {
-  let summary = "operation to define a tensor of particular shape";
-
-  let description = [{
-    `linalg.init_tensor` is an operation that defines a tensor of a particular
-    shape. The shape could be dynamic or static. The contents of the tensor are
-    unspecified and the only purpose of the op result is to materialize the
-    specified shape in IR and make it available to other transformations.
-
-    Note: This op can be lowered to a `bufferization.alloc_tensor`, at which
-    point it turns into an explicit buffer allocation.
-  }];
-
-  let arguments =
-    (ins Variadic<Index>:$sizes, I64ArrayAttr:$static_sizes);
-
-  let results = (outs AnyTensor:$result);
-
-  let assemblyFormat = [{
-    custom<DynamicIndexList>($sizes, $static_sizes,
-                               "ShapedType::kDynamicSize")
-    attr-dict `:` type($result)
-  }];
-
-  let extraClassDeclaration = [{
-    static StringRef getStaticSizesAttrStrName() {
-      return "static_sizes";
-    }
-
-    RankedTensorType getType() {
-      return getResult().getType().cast<RankedTensorType>(); }
-
-    // Infer the shape of the result tensor given the static shapes
-    // and element type of the result tensor.
-    static Type inferResultType(ArrayRef<int64_t> staticSizes, Type elementType,
-                                Attribute encoding = {});
-
-    // Return true if the size of the tensor is dynamic at `idx`
-    bool isDynamicSize(unsigned idx) {
-      APInt v = *(getStaticSizes().getAsValueRange<IntegerAttr>().begin() + idx);
-      return ShapedType::isDynamic(v.getSExtValue());
-    }
-
-    // Assert that the size of the result tensor is static at `idx`
-    // and return the shape.
-    int64_t getStaticSize(unsigned idx) {
-      assert(!isDynamicSize(idx) && "expected static size");
-      APInt v = *(getStaticSizes().
-          template getAsValueRange<IntegerAttr>().begin() + idx);
-        return v.getSExtValue();
-    }
-
-    // Return the argument position that contains the dynamic size of
-    // the tensor at dimension `idx`. Asserts that the shape is
-    // dynamic at that `idx`.
-    unsigned getIndexOfDynamicSize(unsigned idx) {
-      assert(isDynamicSize(idx) && "expected dynamic size");
-      return std::count_if(
-          getStaticSizes().getValue().begin(),
-          getStaticSizes().getValue().begin() + idx,
-          [&](Attribute attr) {
-            return ShapedType::isDynamic(attr.cast<IntegerAttr>().getInt());
-          });
-    }
-
-    // Return both static and dynamic sizes as a list of `OpFoldResult`.
-    SmallVector<OpFoldResult> getMixedSizes();
-
-    // Return the Value of the dynamic size of the tensor at dimension
-    // `idx`. Asserts that the shape is dynamic at that `idx.
-    Value getDynamicSize(unsigned idx) {
-      return getOperand(getIndexOfDynamicSize(idx));
-    }
-  }];
-
-  let builders = [
-    OpBuilder<(ins "ValueRange":$shape,
-                  "ArrayRef<int64_t>":$staticShape, "Type":$elementType),
-    [{
-      build($_builder, $_state,
-            InitTensorOp::inferResultType(staticShape, elementType),
-            shape, $_builder.getI64ArrayAttr(staticShape));
-    }]>,
-    OpBuilder<(ins "ValueRange":$shape, "Type":$elementType),
-    [{
-      SmallVector<int64_t, 4> staticShape(
-        shape.size(), ShapedType::kDynamicSize);
-      build($_builder, $_state, shape, staticShape, elementType);
-    }]>,
-    OpBuilder<(ins "ArrayRef<int64_t>":$staticShape, "Type":$elementType),
-    [{
-      build($_builder, $_state, ValueRange{}, staticShape, elementType);
-    }]>,
-    OpBuilder<(ins "ArrayRef<OpFoldResult>":$sizes, "Type":$elementType,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>
-  ];
-
-  let hasCanonicalizer = 1;
-  let hasCustomAssemblyFormat = 1;
-  let hasVerifier = 1;
-}
-
 def Linalg_YieldOp : Linalg_Op<"yield", [NoSideEffect, ReturnLike, Terminator]>,
     Arguments<(ins Variadic<AnyType>:$values)> {
   let summary = "Linalg yield operation";
diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.h b/mlir/include/mlir/Dialect/Linalg/Passes.h
--- a/mlir/include/mlir/Dialect/Linalg/Passes.h
+++ b/mlir/include/mlir/Dialect/Linalg/Passes.h
@@ -61,8 +61,8 @@
 std::unique_ptr<OperationPass<func::FuncOp>>
 createConvertLinalgToAffineLoopsPass();
 
-/// Create a pass that rewrites init_tensor to alloc_tensor.
-std::unique_ptr<Pass> createLinalgInitTensorToAllocTensorPass();
+/// Create a pass that rewrites tensor.empty to bufferization.alloc_tensor.
+std::unique_ptr<Pass> createEmptyTensorToAllocTensorPass();
 
 /// Create a pass to convert Linalg operations which work on tensors to use
 /// buffers instead.
diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td
--- a/mlir/include/mlir/Dialect/Linalg/Passes.td
+++ b/mlir/include/mlir/Dialect/Linalg/Passes.td
@@ -24,14 +24,14 @@
   let dependentDialects = ["linalg::LinalgDialect", "memref::MemRefDialect"];
 }
 
-def LinalgInitTensorToAllocTensor : Pass<"linalg-init-tensor-to-alloc-tensor"> {
-  let summary = "Replace all init_tensor ops by alloc_tensor ops.";
+def EmptyTensorToAllocTensor : Pass<"empty-tensor-to-alloc-tensor"> {
+  let summary = "Replace all empty ops by alloc_tensor ops.";
   let description = [{
-    init_tensor ops return a tensor of unspecified contents who's only purpose
+    tensor.empty ops return a tensor of unspecified contents who's only purpose
     is to carry the tensor shape. This pass converts such ops to
     bufferization.alloc_tensor ops, which bufferize to buffer allocations.
   }];
-  let constructor = "mlir::createLinalgInitTensorToAllocTensorPass()";
+  let constructor = "mlir::createEmptyTensorToAllocTensorPass()";
 }
 
 def LinalgFoldUnitExtentDims : Pass<"linalg-fold-unit-extent-dims", ""> {
diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -506,7 +506,7 @@
     ```
       %cst = arith.constant 0.000000e+00 : f32
       %0 = tensor.expand_shape %in [[0, 1]] : tensor<32xf32> into tensor<4x8xf32>
-      %1 = linalg.init_tensor [4] : tensor<4xf32>
+      %1 = tensor.empty() : tensor<4xf32>
       %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32>
       %3 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                             affine_map<(d0, d1) -> (d0)>],
@@ -557,11 +557,11 @@
      #map3 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
      #map4 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
      #map5 = affine_map<(d0, d1, d2) -> (d0, d1)>
-     %0 = linalg.init_tensor [16, 32, 64] : tensor<16x32x64xf32>
+     %0 = tensor.empty() : tensor<16x32x64xf32>
      %cst = arith.constant 0.000000e+00 : f32
      %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<16x32x64xf32>) ->
         tensor<16x32x64xf32>
-     %2 = linalg.init_tensor [64, 4] : tensor<64x4xi1>
+     %2 = tensor.empty() : tensor<64x4xi1>
 
      %3 = linalg.generic {indexing_maps = [#map0, #map1, #map2, #map3],
        iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h b/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h
@@ -49,7 +49,7 @@
 ///
 /// ```
 ///    scf.for (%i) {
-///      %packed_init = linalg.init_tensor range(%j) : tensor<?x4x8xf32>
+///      %packed_init = tensor.empty range(%j) : tensor<?x4x8xf32>
 ///      %packed = scf.for (%k) iter_args(%p : %packed_init) {
 ///        %st0 = tensor.extract_slice f(%i, %k) : ... to tensor<?x?xf32>
 ///        %0 = tensor.pad %st0 low[0, 0] high[...] {
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -1196,7 +1196,7 @@
 using OptimizeCopyFn =
     std::function<LogicalResult(PatternRewriter &, tensor::PadOp, Value)>;
 
-/// Rewrite a tensor::PadOp into a sequence of InitTensorOp, FillOp and
+/// Rewrite a tensor::PadOp into a sequence of EmptyOp, FillOp and
 /// InsertSliceOp. For now, only constant padding values are supported.
 /// `OptimizeCopyFn` can be used to customize copying step optimization.
 struct GeneralizePadOpPattern : public OpRewritePattern<tensor::PadOp> {
@@ -1407,7 +1407,7 @@
 /// ```
 ///  %cst = arith.constant 0.000000e+00 : f32
 ///  %0 = tensor.expand_shape %in [[0, 1]] : tensor<32xf32> into tensor<4x8xf32>
-///  %1 = linalg.init_tensor [4] : tensor<4xf32>
+///  %1 = tensor.empty [4] : tensor<4xf32>
 ///  %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32>
 ///  %3 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
 ///                                        affine_map<(d0, d1) -> (d0)>],
@@ -1464,11 +1464,11 @@
 ///  #map3 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
 ///  #map4 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
 ///  #map5 = affine_map<(d0, d1, d2) -> (d0, d1)>
-///  %0 = linalg.init_tensor [16, 32, 64] : tensor<16x32x64xf32>
+///  %0 = tensor.empty [16, 32, 64] : tensor<16x32x64xf32>
 ///  %cst = arith.constant 0.000000e+00 : f32
 ///  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<16x32x64xf32>) ->
 ///     tensor<16x32x64xf32>
-///  %2 = linalg.init_tensor [64, 4] : tensor<64x4xi1>
+///  %2 = tensor.empty [64, 4] : tensor<64x4xi1>
 ///
 ///  %3 = linalg.generic {indexing_maps = [#map0, #map1, #map2, #map3],
 ///    iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorBase.td b/mlir/include/mlir/Dialect/Tensor/IR/TensorBase.td
--- a/mlir/include/mlir/Dialect/Tensor/IR/TensorBase.td
+++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorBase.td
@@ -47,6 +47,7 @@
 
   let hasConstantMaterializer = 1;
   let dependentDialects = [
+    "AffineDialect",
     "arith::ArithDialect",
     "complex::ComplexDialect",
   ];
diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
--- a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
+++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
@@ -136,6 +136,65 @@
   let hasVerifier = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// EmptyOp
+//===----------------------------------------------------------------------===//
+
+def Tensor_EmptyOp : Tensor_Op<"empty",
+    [NoSideEffect,
+     DeclareOpInterfaceMethods<ReifyRankedShapedTypeOpInterface>]> {
+  let summary = "empty tensor operation";
+
+  let description = [{
+    `tensor.empty` is an operation that defines a tensor of a particular shape.
+    The shape could be dynamic or static. The contents of the tensor are
+    unspecified and the only purpose of the op result is to materialize the
+    specified shape in IR and make it available to other transformations.
+
+    `tensor.empty` is useful in transformations that expect destination style
+    ops. I.e., ops that implement `DestinationStyleOpInterface`. Ops that are
+    not in destination style can be made compatible with such transformations
+    with a `tensor.empty` destination.
+
+    Note: This op can be lowered to a `bufferization.alloc_tensor`, at which
+    point it turns into an explicit buffer allocation.
+  }];
+
+  let arguments = (ins Variadic<Index>:$dynamicSizes);
+
+  let results = (outs AnyRankedTensor:$result);
+
+  let assemblyFormat = "`(`$dynamicSizes`)` attr-dict `:` type($result)";
+
+  let extraClassDeclaration = [{
+    RankedTensorType getType() {
+      return getResult().getType().cast<RankedTensorType>();
+    }
+
+    // Return both static and dynamic sizes as a list of `OpFoldResult`.
+    SmallVector<OpFoldResult> getMixedSizes();
+
+    // Return the Value of the dynamic size of the tensor at dimension `idx`.
+    // Asserts that the shape is dynamic at that `idx`.
+    Value getDynamicSize(unsigned idx);
+  }];
+
+  let builders = [
+    // Build with fully static sizes.
+    OpBuilder<(ins "ArrayRef<int64_t>":$staticShape, "Type":$elementType)>,
+
+    // Build with mixed static/dynamic sizes.
+    OpBuilder<(ins "ArrayRef<int64_t>":$staticShape, "Type":$elementType,
+                   "ValueRange":$dynamicSizes)>,
+
+    // Build with mixed static/dynamic sizes.
+    OpBuilder<(ins "ArrayRef<OpFoldResult>":$sizes, "Type":$elementType)>
+  ];
+
+  let hasCanonicalizer = 1;
+  let hasVerifier = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // ExtractOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/TransformUtils.h b/mlir/include/mlir/Dialect/Tensor/Transforms/TransformUtils.h
--- a/mlir/include/mlir/Dialect/Tensor/Transforms/TransformUtils.h
+++ b/mlir/include/mlir/Dialect/Tensor/Transforms/TransformUtils.h
@@ -62,7 +62,7 @@
 /// We can construct %2 by generating the following, which only uses `%0`:
 ///
 /// ```
-/// %dest = linalg.init_tensor [%size0, %size1] : tensor<?x?xf32>
+/// %dest = tensor.empty(%size0, %size1) : tensor<?x?xf32>
 /// %1 = tensor.dim %0, %c1 : tensor<3x?x?x11x?xf32>
 /// %2 = tensor.dim %0, %c2 : tensor<3x?x?x11x?xf32>
 /// %3 = tensor.dim %0, %c4 : tensor<3x?x?x11x?xf32>
diff --git a/mlir/include/mlir/Dialect/Utils/ReshapeOpsUtils.h b/mlir/include/mlir/Dialect/Utils/ReshapeOpsUtils.h
--- a/mlir/include/mlir/Dialect/Utils/ReshapeOpsUtils.h
+++ b/mlir/include/mlir/Dialect/Utils/ReshapeOpsUtils.h
@@ -400,7 +400,7 @@
 /// ```
 /// This class helps build the below IR to replace %2:
 /// ```
-/// %dest = linalg.init_tensor() : tensor<10x10xf32>
+/// %dest = tensor.empty() : tensor<10x10xf32>
 /// %2 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%arg0) -> tensor<10x10xf32> {
 ///    %linear_index = affine.apply affine_map<(d0)[]->(d0*2 + 11)>(%iv)
 ///    %3:3 = arith.delinearize_index %iv into (3, 7, 11)
diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
--- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
+++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
@@ -556,7 +556,7 @@
     bodyArgTypes.emplace_back(getElementTypeOrSelf(in.getType()));
 
   SmallVector<Type> opResultTypes;
-  SmallVector<Value> initTensors;
+  SmallVector<Value> emptyTensors;
 
   SmallVector<Value> dynDims;
   dynDims.resize(results.front().getType().cast<ShapedType>().getRank());
@@ -573,13 +573,13 @@
 
   for (auto result : results) {
     auto resultTy = result.getType().template cast<ShapedType>();
-    initTensors.push_back(rewriter.create<linalg::InitTensorOp>(
-        loc, filteredDims, resultTy.getShape(), resultTy.getElementType()));
+    emptyTensors.push_back(rewriter.create<tensor::EmptyOp>(
+        loc, resultTy.getShape(), resultTy.getElementType(), filteredDims));
     opResultTypes.push_back(result.getType());
   }
 
   auto bodyResultTypes = llvm::to_vector<4>(llvm::map_range(
-      initTensors, [](Value v) { return getElementTypeOrSelf(v); }));
+      emptyTensors, [](Value v) { return getElementTypeOrSelf(v); }));
 
   SmallVector<Value, 2> operands;
   SmallVector<AffineMap, 2> indexingMaps;
@@ -623,7 +623,7 @@
 
   bool didEncounterError = false;
   auto linalgOp = rewriter.create<linalg::GenericOp>(
-      loc, opResultTypes, operands, initTensors, indexingMaps,
+      loc, opResultTypes, operands, emptyTensors, indexingMaps,
       getNParallelLoopsAttrs(rank),
       [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange blockArgs) {
         Value opResult = createLinalgBodyCalculationForElementwiseOp(
@@ -771,10 +771,11 @@
   Type reduceTy = RankedTensorType::get(reduceShape, resultTy.getElementType());
 
   // First fill the output buffer with the init value.
-  auto initTensor = rewriter
-                        .create<linalg::InitTensorOp>(loc, dynDims, reduceShape,
-                                                      resultTy.getElementType())
-                        .getResult();
+  auto emptyTensor =
+      rewriter
+          .create<tensor::EmptyOp>(loc, reduceShape, resultTy.getElementType(),
+                                   dynDims)
+          .getResult();
 
   auto fillValueAttr = createInitialValueForReduceOp(op, elementTy, rewriter);
   if (!fillValueAttr)
@@ -784,7 +785,7 @@
   auto fillValue = rewriter.create<arith::ConstantOp>(loc, fillValueAttr);
   auto filledTensor = rewriter
                           .create<linalg::FillOp>(loc, ValueRange{fillValue},
-                                                  ValueRange{initTensor})
+                                                  ValueRange{emptyTensor})
                           .result();
 
   SmallVector<AffineExpr, 2> srcExprs;
@@ -1091,8 +1092,8 @@
 
     SmallVector<Value> filteredDims = condenseValues(dynDims);
 
-    auto initTensor = rewriter.create<linalg::InitTensorOp>(
-        loc, filteredDims, resultTy.getShape(), resultTy.getElementType());
+    auto emptyTensor = rewriter.create<tensor::EmptyOp>(
+        loc, resultTy.getShape(), resultTy.getElementType(), filteredDims);
 
     SmallVector<AffineMap, 2> affineMaps = {
         AffineMap::get(resultTy.getRank(), /*symbolCount=*/0, inputExprs,
@@ -1100,7 +1101,7 @@
         rewriter.getMultiDimIdentityMap(resultTy.getRank())};
 
     rewriter.replaceOpWithNewOp<linalg::GenericOp>(
-        op, resultTy, op.getInput1(), ValueRange{initTensor}, affineMaps,
+        op, resultTy, op.getInput1(), ValueRange{emptyTensor}, affineMaps,
         getNParallelLoopsAttrs(resultTy.getRank()),
         [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) {
           nestedBuilder.create<linalg::YieldOp>(loc, *args.begin());
@@ -1206,12 +1207,12 @@
     indexingMaps.push_back(rewriter.getMultiDimIdentityMap(rank));
 
     // Construct the indexing maps needed for linalg.generic ops.
-    Value initTensor = rewriter.create<linalg::InitTensorOp>(
-        loc, ArrayRef<Value>({dynDims}), outputTy.getShape(),
-        outputTy.getElementType());
+    Value emptyTensor = rewriter.create<tensor::EmptyOp>(
+        loc, outputTy.getShape(), outputTy.getElementType(),
+        ArrayRef<Value>({dynDims}));
 
     auto linalgOp = rewriter.create<linalg::GenericOp>(
-        loc, outputTy, genericInputs, ValueRange{initTensor}, indexingMaps,
+        loc, outputTy, genericInputs, ValueRange{emptyTensor}, indexingMaps,
         getNParallelLoopsAttrs(rank),
         [&](OpBuilder &nestedBuilder, Location nestedLoc,
             ValueRange blockArgs) {
@@ -1328,14 +1329,14 @@
     if (op.getMode() != "NEAREST_NEIGHBOR" && op.getMode() != "BILINEAR")
       return failure();
 
-    auto initTensor = rewriter.create<linalg::InitTensorOp>(
-        loc, dynamicDims, resultTy.getShape(), resultElementTy);
+    auto emptyTensor = rewriter.create<tensor::EmptyOp>(
+        loc, resultTy.getShape(), resultElementTy, dynamicDims);
 
     SmallVector<AffineMap, 2> affineMaps = {
         rewriter.getMultiDimIdentityMap(resultTy.getRank())};
 
     auto genericOp = rewriter.create<linalg::GenericOp>(
-        loc, resultTy, ValueRange({}), ValueRange{initTensor}, affineMaps,
+        loc, resultTy, ValueRange({}), ValueRange{emptyTensor}, affineMaps,
         getNParallelLoopsAttrs(resultTy.getRank()));
     rewriter.replaceOp(op, genericOp.getResult(0));
 
@@ -1634,15 +1635,15 @@
     }
     sizes[axis] = resultDimSize;
 
-    Value init = rewriter.create<linalg::InitTensorOp>(
-        loc, dynDims, resultType.getShape(), resultType.getElementType());
+    Value emptyTensor = rewriter.create<tensor::EmptyOp>(
+        loc, resultType.getShape(), resultType.getElementType(), dynDims);
 
     Value zeroVal = rewriter.createOrFold<arith::ConstantOp>(
         loc, rewriter.getZeroAttr(resultType.getElementType()));
-    Value result =
-        rewriter
-            .create<linalg::FillOp>(loc, ValueRange{zeroVal}, ValueRange{init})
-            .result();
+    Value result = rewriter
+                       .create<linalg::FillOp>(loc, ValueRange{zeroVal},
+                                               ValueRange{emptyTensor})
+                       .result();
 
     auto toOpFoldResult = [](Value v) -> OpFoldResult {
       auto op = v.getDefiningOp<arith::ConstantIndexOp>();
@@ -1687,16 +1688,16 @@
     Value axisDimSize = rewriter.create<tensor::DimOp>(loc, input, axis);
 
     // First fill the output buffer with the init value.
-    auto initTensor = rewriter
-                          .create<linalg::InitTensorOp>(
-                              loc, ArrayRef<Value>({dynDims}),
-                              inputTy.getShape(), inputTy.getElementType())
-                          .getResult();
+    auto emptyTensor = rewriter
+                           .create<tensor::EmptyOp>(loc, inputTy.getShape(),
+                                                    inputTy.getElementType(),
+                                                    ArrayRef<Value>({dynDims}))
+                           .getResult();
     SmallVector<AffineMap, 2> affineMaps = {
         rewriter.getMultiDimIdentityMap(resultTy.getRank())};
 
     rewriter.replaceOpWithNewOp<linalg::GenericOp>(
-        op, resultTy, ArrayRef<Value>({}), ValueRange{initTensor}, affineMaps,
+        op, resultTy, ArrayRef<Value>({}), ValueRange{emptyTensor}, affineMaps,
         getNParallelLoopsAttrs(resultTy.getRank()),
         [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) {
           llvm::SmallVector<Value> indices;
@@ -1758,8 +1759,8 @@
       }
     }
 
-    auto initTensor = rewriter.create<linalg::InitTensorOp>(
-        op.getLoc(), dynDims, genericShape, elementTy);
+    auto emptyTensor = rewriter.create<tensor::EmptyOp>(
+        op.getLoc(), genericShape, elementTy, dynDims);
 
     // We needs to map the input shape to the non-broadcasted dimensions.
     SmallVector<AffineExpr, 4> dimExprs;
@@ -1776,7 +1777,7 @@
 
     auto genericOp = rewriter.create<linalg::GenericOp>(
         loc, RankedTensorType::get(genericShape, elementTy), input,
-        ValueRange{initTensor}, affineMaps,
+        ValueRange{emptyTensor}, affineMaps,
         getNParallelLoopsAttrs(genericShape.size()),
         [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) {
           nestedBuilder.create<linalg::YieldOp>(op.getLoc(), *args.begin());
@@ -1906,24 +1907,23 @@
     }
 
     // First fill the output buffer for the index.
-    auto initTensorIdx =
-        rewriter
-            .create<linalg::InitTensorOp>(loc, dynDims, resultTy.getShape(),
-                                          outElementTy)
-            .getResult();
+    auto emptyTensorIdx = rewriter
+                              .create<tensor::EmptyOp>(loc, resultTy.getShape(),
+                                                       outElementTy, dynDims)
+                              .getResult();
     auto fillValueIdx = rewriter.create<arith::ConstantOp>(
         loc, rewriter.getIntegerAttr(outElementTy, 0));
     auto filledTensorIdx =
         rewriter
             .create<linalg::FillOp>(loc, ValueRange{fillValueIdx},
-                                    ValueRange{initTensorIdx})
+                                    ValueRange{emptyTensorIdx})
             .result();
 
     // Second fill the output buffer for the running max.
-    auto initTensorMax = rewriter
-                             .create<linalg::InitTensorOp>(
-                                 loc, dynDims, resultTy.getShape(), inElementTy)
-                             .getResult();
+    auto emptyTensorMax = rewriter
+                              .create<tensor::EmptyOp>(loc, resultTy.getShape(),
+                                                       inElementTy, dynDims)
+                              .getResult();
     auto fillValueMaxAttr =
         createInitialValueForReduceOp(argmaxOp, inElementTy, rewriter);
 
@@ -1936,7 +1936,7 @@
     auto filledTensorMax =
         rewriter
             .create<linalg::FillOp>(loc, ValueRange{fillValueMax},
-                                    ValueRange{initTensorMax})
+                                    ValueRange{emptyTensorMax})
             .result();
 
     // We need to reduce along the arg-max axis, with parallel operations along
@@ -2018,10 +2018,10 @@
 
     auto loc = op.getLoc();
 
-    auto initTensor =
+    auto emptyTensor =
         rewriter
-            .create<linalg::InitTensorOp>(loc, dynamicDims, resultTy.getShape(),
-                                          resultElementTy)
+            .create<tensor::EmptyOp>(loc, resultTy.getShape(), resultElementTy,
+                                     dynamicDims)
             .getResult();
 
     SmallVector<AffineMap, 2> affineMaps = {
@@ -2033,7 +2033,7 @@
 
     auto genericOp = rewriter.create<linalg::GenericOp>(
         loc, ArrayRef<Type>({resultTy}), ValueRange{indices},
-        ValueRange{initTensor}, affineMaps,
+        ValueRange{emptyTensor}, affineMaps,
         getNParallelLoopsAttrs(resultTy.getRank()),
         [&](OpBuilder &b, Location loc, ValueRange args) {
           auto indexValue = args[0];
@@ -2078,18 +2078,17 @@
       }
     }
 
-    auto initTensor =
-        rewriter
-            .create<linalg::InitTensorOp>(loc, dynDims, resultTy.getShape(),
-                                          resultElementTy)
-            .getResult();
+    auto emptyTensor = rewriter
+                           .create<tensor::EmptyOp>(loc, resultTy.getShape(),
+                                                    resultElementTy, dynDims)
+                           .getResult();
 
     SmallVector<AffineMap, 2> affineMaps = {
         rewriter.getMultiDimIdentityMap(resultTy.getRank()),
         rewriter.getMultiDimIdentityMap(resultTy.getRank())};
 
     auto genericOp = rewriter.create<linalg::GenericOp>(
-        loc, resultTy, ValueRange({input}), ValueRange{initTensor}, affineMaps,
+        loc, resultTy, ValueRange({input}), ValueRange{emptyTensor}, affineMaps,
         getNParallelLoopsAttrs(resultTy.getRank()));
     rewriter.replaceOp(op, genericOp.getResult(0));
 
diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp
--- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp
+++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp
@@ -241,12 +241,12 @@
                                                 weightPermValue);
 
     Attribute resultZeroAttr = rewriter.getZeroAttr(resultETy);
-    Value initTensor = rewriter.create<linalg::InitTensorOp>(
-        loc, filteredDims, resultTy.getShape(), resultETy);
+    Value emptyTensor = rewriter.create<tensor::EmptyOp>(
+        loc, resultTy.getShape(), resultETy, filteredDims);
     Value zero = rewriter.create<arith::ConstantOp>(loc, resultZeroAttr);
     Value zeroTensor = rewriter
                            .create<linalg::FillOp>(loc, ValueRange{zero},
-                                                   ValueRange{initTensor})
+                                                   ValueRange{emptyTensor})
                            .result();
 
     // Extract the attributes for convolution.
@@ -268,8 +268,8 @@
     indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank()));
     indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank()));
 
-    Value biasInitTensor = rewriter.create<linalg::InitTensorOp>(
-        loc, filteredDims, resultTy.getShape(), resultETy);
+    Value biasEmptyTensor = rewriter.create<tensor::EmptyOp>(
+        loc, resultTy.getShape(), resultETy, filteredDims);
 
     if (isQuantized) {
       auto quantizationInfo =
@@ -289,7 +289,7 @@
       Value result =
           rewriter
               .create<linalg::GenericOp>(
-                  loc, resultTy, ValueRange({bias, conv}), biasInitTensor,
+                  loc, resultTy, ValueRange({bias, conv}), biasEmptyTensor,
                   indexingMaps, getNParallelLoopsAttrs(resultTy.getRank()),
                   [&](OpBuilder &nestedBuilder, Location nestedLoc,
                       ValueRange args) {
@@ -311,7 +311,7 @@
     Value result =
         rewriter
             .create<linalg::GenericOp>(
-                loc, resultTy, ValueRange({bias, conv}), biasInitTensor,
+                loc, resultTy, ValueRange({bias, conv}), biasEmptyTensor,
                 indexingMaps, getNParallelLoopsAttrs(resultTy.getRank()),
                 [&](OpBuilder &nestedBuilder, Location nestedLoc,
                     ValueRange args) {
@@ -426,16 +426,16 @@
     indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultRank));
 
     Attribute resultZeroAttr = rewriter.getZeroAttr(resultETy);
-    Value initTensor = rewriter.create<linalg::InitTensorOp>(
-        loc, filteredDims, linalgConvTy.getShape(), resultETy);
+    Value emptyTensor = rewriter.create<tensor::EmptyOp>(
+        loc, linalgConvTy.getShape(), resultETy, filteredDims);
     Value zero = rewriter.create<arith::ConstantOp>(loc, resultZeroAttr);
     Value zeroTensor = rewriter
                            .create<linalg::FillOp>(loc, ValueRange{zero},
-                                                   ValueRange{initTensor})
+                                                   ValueRange{emptyTensor})
                            .result();
 
-    Value biasInitTensor = rewriter.create<linalg::InitTensorOp>(
-        loc, filteredDims, resultTy.getShape(), resultETy);
+    Value biasEmptyTensor = rewriter.create<tensor::EmptyOp>(
+        loc, resultTy.getShape(), resultETy, filteredDims);
     if (!isQuantized) {
       Value conv = rewriter
                        .create<linalg::DepthwiseConv2DNhwcHwcmOp>(
@@ -452,7 +452,7 @@
           rewriter
               .create<linalg::GenericOp>(
                   loc, resultTy, ValueRange({bias, convReshape}),
-                  biasInitTensor, indexingMaps,
+                  biasEmptyTensor, indexingMaps,
                   getNParallelLoopsAttrs(resultRank),
                   [&](OpBuilder &nestedBuilder, Location nestedLoc,
                       ValueRange args) {
@@ -479,7 +479,7 @@
           rewriter
               .create<linalg::GenericOp>(
                   loc, resultTy, ValueRange({bias, convReshape}),
-                  biasInitTensor, indexingMaps,
+                  biasEmptyTensor, indexingMaps,
                   getNParallelLoopsAttrs(resultRank),
                   [&](OpBuilder &nestedBuilder, Location nestedLoc,
                       ValueRange args) {
@@ -527,11 +527,11 @@
 
     auto zeroAttr = rewriter.getZeroAttr(outputElementTy);
     Value zero = rewriter.create<arith::ConstantOp>(loc, zeroAttr);
-    auto initTensor = rewriter.create<linalg::InitTensorOp>(
-        loc, filteredDims, outputTy.getShape(), outputTy.getElementType());
+    auto emptyTensor = rewriter.create<tensor::EmptyOp>(
+        loc, outputTy.getShape(), outputTy.getElementType(), filteredDims);
     Value zeroTensor = rewriter
                            .create<linalg::FillOp>(loc, ValueRange{zero},
-                                                   ValueRange{initTensor})
+                                                   ValueRange{emptyTensor})
                            .result();
     if (!op.getQuantizationInfo()) {
       rewriter.replaceOpWithNewOp<linalg::BatchMatmulOp>(
@@ -597,15 +597,15 @@
     indexingMaps.push_back(rewriter.getMultiDimIdentityMap(outputTy.getRank()));
     indexingMaps.push_back(rewriter.getMultiDimIdentityMap(outputTy.getRank()));
 
-    auto initTensor = rewriter.create<linalg::InitTensorOp>(
-        loc, filteredDims, outputTy.getShape(), outputTy.getElementType());
+    auto emptyTensor = rewriter.create<tensor::EmptyOp>(
+        loc, outputTy.getShape(), outputTy.getElementType(), filteredDims);
 
     // When quantized, the input elemeny type is not the same as the output
     Attribute resultZeroAttr = rewriter.getZeroAttr(outputETy);
     Value zero = rewriter.create<arith::ConstantOp>(loc, resultZeroAttr);
     Value zeroTensor = rewriter
                            .create<linalg::FillOp>(loc, ValueRange{zero},
-                                                   ValueRange{initTensor})
+                                                   ValueRange{emptyTensor})
                            .result();
 
     SmallVector<int64_t> permutation{1, 0};
@@ -621,10 +621,10 @@
     Value transposedWeight = rewriter.create<tosa::TransposeOp>(
         loc, newWeightTy, weight, permutationValue);
 
-    auto biasInitTensor =
+    auto biasEmptyTensor =
         rewriter
-            .create<linalg::InitTensorOp>(loc, filteredDims,
-                                          outputTy.getShape(), outputETy)
+            .create<tensor::EmptyOp>(loc, outputTy.getShape(), outputETy,
+                                     filteredDims)
             ->getResults();
 
     if (!op.getQuantizationInfo()) {
@@ -637,7 +637,7 @@
       Value result =
           rewriter
               .create<linalg::GenericOp>(
-                  loc, outputTy, ValueRange({bias, matmul}), biasInitTensor,
+                  loc, outputTy, ValueRange({bias, matmul}), biasEmptyTensor,
                   indexingMaps, getNParallelLoopsAttrs(outputTy.getRank()),
                   [&](OpBuilder &nestedBuilder, Location nestedLoc,
                       ValueRange args) {
@@ -665,7 +665,7 @@
     Value result =
         rewriter
             .create<linalg::GenericOp>(
-                loc, outputTy, ValueRange({bias, matmul}), biasInitTensor,
+                loc, outputTy, ValueRange({bias, matmul}), biasEmptyTensor,
                 indexingMaps, getNParallelLoopsAttrs(outputTy.getRank()),
                 [&](OpBuilder &nestedBuilder, Location nestedLoc,
                     ValueRange args) {
@@ -732,21 +732,21 @@
     Attribute dilationAttr = rewriter.getI64VectorAttr({1, 1});
 
     // Create the linalg op that performs pooling.
-    Value initTensor = rewriter.create<linalg::InitTensorOp>(
-        loc, dynamicDims, resultTy.getShape(), resultTy.getElementType());
+    Value emptyTensor = rewriter.create<tensor::EmptyOp>(
+        loc, resultTy.getShape(), resultTy.getElementType(), dynamicDims);
 
-    Value filledInitTensor =
+    Value filledEmptyTensor =
         rewriter
             .create<linalg::FillOp>(loc, ValueRange{initialValue},
-                                    ValueRange{initTensor})
+                                    ValueRange{emptyTensor})
             .result();
 
     Value fakeWindowDims =
-        rewriter.create<linalg::InitTensorOp>(loc, kernel, resultETy);
+        rewriter.create<tensor::EmptyOp>(loc, kernel, resultETy);
 
     rewriter.replaceOpWithNewOp<linalg::PoolingNhwcMaxOp>(
         op, ArrayRef<Type>{resultTy}, ValueRange{paddedInput, fakeWindowDims},
-        filledInitTensor, strideAttr, dilationAttr);
+        filledEmptyTensor, strideAttr, dilationAttr);
     return success();
   }
 };
@@ -794,24 +794,24 @@
     Attribute dilationAttr = rewriter.getI64VectorAttr({1, 1});
 
     // Create the linalg op that performs pooling.
-    Value poolInitTensor = rewriter.create<linalg::InitTensorOp>(
-        loc, dynamicDims, accTy.getShape(), accETy);
+    Value poolEmptyTensor = rewriter.create<tensor::EmptyOp>(
+        loc, accTy.getShape(), accETy, dynamicDims);
 
-    Value filledInitTensor =
+    Value filledEmptyTensor =
         rewriter
             .create<linalg::FillOp>(loc, ValueRange{initialValue},
-                                    ValueRange{poolInitTensor})
+                                    ValueRange{poolEmptyTensor})
             .result();
 
     Value fakeWindowDims =
-        rewriter.create<linalg::InitTensorOp>(loc, kernel, accETy);
+        rewriter.create<tensor::EmptyOp>(loc, kernel, accETy);
 
     // Sum across the pooled region.
     Value poolingOp = rewriter
                           .create<linalg::PoolingNhwcSumOp>(
                               loc, ArrayRef<Type>{accTy},
                               ValueRange{paddedInput, fakeWindowDims},
-                              filledInitTensor, strideAttr, dilationAttr)
+                              filledEmptyTensor, strideAttr, dilationAttr)
                           .getResult(0);
 
     // Normalize the summed value by the number of elements grouped in each
@@ -819,12 +819,12 @@
     auto poolingOpTy = poolingOp.getType().cast<ShapedType>();
     auto affineMap = rewriter.getMultiDimIdentityMap(resultTy.getRank());
 
-    Value genericInitTensor = rewriter.create<linalg::InitTensorOp>(
-        loc, dynamicDims, resultTy.getShape(), resultETy);
+    Value genericEmptyTensor = rewriter.create<tensor::EmptyOp>(
+        loc, resultTy.getShape(), resultETy, dynamicDims);
 
     auto genericOp = rewriter.create<linalg::GenericOp>(
         loc, ArrayRef<Type>({resultTy}), ValueRange{poolingOp},
-        ValueRange{genericInitTensor},
+        ValueRange{genericEmptyTensor},
         ArrayRef<AffineMap>({affineMap, affineMap}),
         getNParallelLoopsAttrs(resultTy.getRank()),
         [&](OpBuilder &b, Location loc, ValueRange args) {
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -545,11 +545,11 @@
     auto oldResultType = padOp.getResultType();
     SmallVector<int64_t, 4> staticShape(oldResultType.getRank(),
                                         ShapedType::kDynamicSize);
-    auto newInitOp = rewriter.create<InitTensorOp>(
-        padOp.getLoc(), reifiedShape.front(), staticShape,
-        oldResultType.getElementType());
+    auto emptyTensor = rewriter.create<tensor::EmptyOp>(
+        padOp.getLoc(), staticShape, oldResultType.getElementType(),
+        reifiedShape.front());
     auto newFillOp = rewriter.create<FillOp>(
-        fillOp.getLoc(), ValueRange{padValue}, ValueRange{newInitOp});
+        fillOp.getLoc(), ValueRange{padValue}, ValueRange{emptyTensor});
     rewriter.replaceOpWithNewOp<tensor::CastOp>(padOp, oldResultType,
                                                 newFillOp.result());
 
@@ -1386,285 +1386,6 @@
   return success();
 }
 
-//===----------------------------------------------------------------------===//
-// InitTensorOp
-//===----------------------------------------------------------------------===//
-
-void InitTensorOp::build(OpBuilder &b, OperationState &result,
-                         ArrayRef<OpFoldResult> sizes, Type elementType,
-                         ArrayRef<NamedAttribute> attrs) {
-  SmallVector<Value, 4> dynamicSizes;
-  SmallVector<int64_t, 4> staticSizes;
-  dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes,
-                             ShapedType::kDynamicSize);
-  auto resultType = RankedTensorType ::get(staticSizes, elementType);
-  build(b, result, resultType, dynamicSizes, b.getI64ArrayAttr(staticSizes));
-  result.addAttributes(attrs);
-}
-
-LogicalResult InitTensorOp::verify() {
-  RankedTensorType resultType = getType();
-  SmallVector<int64_t, 4> staticSizes = llvm::to_vector<4>(llvm::map_range(
-      getStaticSizes().cast<ArrayAttr>(),
-      [](Attribute a) -> int64_t { return a.cast<IntegerAttr>().getInt(); }));
-
-  if (failed(verifyListOfOperandsOrIntegers(
-          *this, "sizes", resultType.getRank(), getStaticSizes(), getSizes(),
-          ShapedType::isDynamic)))
-    return failure();
-
-  if (getStaticSizes().size() != static_cast<unsigned>(resultType.getRank()))
-    return emitError("expected ") << resultType.getRank() << " sizes values";
-
-  Type expectedType = InitTensorOp::inferResultType(
-      staticSizes, resultType.getElementType(), resultType.getEncoding());
-  if (resultType != expectedType) {
-    return emitError("specified type ")
-           << resultType << " does not match the inferred type "
-           << expectedType;
-  }
-  return success();
-}
-
-Type InitTensorOp::inferResultType(ArrayRef<int64_t> staticSizes,
-                                   Type elementType, Attribute encoding) {
-  return RankedTensorType::get(staticSizes, elementType, encoding);
-}
-
-SmallVector<OpFoldResult> InitTensorOp::getMixedSizes() {
-  SmallVector<OpFoldResult> mixedSizes;
-  mixedSizes.reserve(getType().getRank());
-  unsigned dynamicValIndex = 0;
-  for (Attribute attr : getStaticSizes()) {
-    auto intAttr = attr.cast<IntegerAttr>();
-    if (!ShapedType::isDynamic(intAttr.getInt())) {
-      mixedSizes.push_back(intAttr);
-      continue;
-    }
-    mixedSizes.push_back(getSizes()[dynamicValIndex++]);
-  }
-  return mixedSizes;
-}
-
-namespace {
-/// Change the type of the result of a `linalg.init_tensor` by making the result
-/// type statically sized along dimension that in the original operation where
-/// defined as dynamic, but the size was defined using a `constant` op. For
-/// example
-///
-///  %c5 = arith.constant 5: index
-///  %0 = linalg.init_tensor [%arg0, %c5] : tensor<?x?xf32>
-///
-///  to
-///
-///  %0 = linalg.init_tensor [%arg0, 5] : tensor<?x5xf32>
-struct ReplaceStaticShapeDims : OpRewritePattern<InitTensorOp> {
-  using OpRewritePattern<InitTensorOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(InitTensorOp op,
-                                PatternRewriter &rewriter) const override {
-    SmallVector<Value, 4> dynamicSizes;
-    SmallVector<int64_t, 4> staticSizes;
-    for (unsigned i = 0, e = op.getType().getRank(); i != e; ++i) {
-      // If the size is already static, nothing to do.
-      if (!op.isDynamicSize(i)) {
-        staticSizes.push_back(op.getStaticSize(i));
-        continue;
-      }
-
-      // If the size is dynamic but defined using a `constant` op, get the
-      // constant value to find the static size to use.
-      unsigned operandNum = op.getIndexOfDynamicSize(i);
-      Value sizeOperand = op.getOperand(operandNum);
-      if (auto constantIndexOp =
-              sizeOperand.getDefiningOp<arith::ConstantIndexOp>()) {
-        staticSizes.push_back(constantIndexOp.value());
-        continue;
-      }
-
-      // Fallback case. Keep the size dynamic.
-      dynamicSizes.push_back(sizeOperand);
-      staticSizes.push_back(ShapedType::kDynamicSize);
-    }
-    RankedTensorType newType =
-        RankedTensorType::get(staticSizes, op.getType().getElementType());
-    if (newType == op.getType())
-      return failure();
-    auto newOp =
-        rewriter.create<InitTensorOp>(op.getLoc(), newType, dynamicSizes,
-                                      rewriter.getI64ArrayAttr(staticSizes));
-    rewriter.replaceOpWithNewOp<tensor::CastOp>(op, op.getType(), newOp);
-    return success();
-  }
-};
-} // namespace
-
-namespace {
-/// Since `init_tensor` operation creates a tensor needed only for its shape, a
-/// slice of this is also needed only for its shape. The result can be
-/// replaced by a new init_tensor operation of the same size as the extract
-/// slice op.
-struct FoldInitTensorWithExtractSliceOp
-    : public OpRewritePattern<tensor::ExtractSliceOp> {
-  using OpRewritePattern<tensor::ExtractSliceOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(tensor::ExtractSliceOp sliceOp,
-                                PatternRewriter &rewriter) const override {
-    if (!sliceOp.getSource().getDefiningOp<linalg::InitTensorOp>())
-      return failure();
-    // ExtractSliceOp may be rank-reducing; its dynamic sizes must be preserved
-    // as well as its result type.
-    rewriter.replaceOpWithNewOp<linalg::InitTensorOp>(
-        sliceOp, sliceOp.getSizes(),
-        sliceOp.getResult().getType().cast<RankedTensorType>().getShape(),
-        sliceOp.getSourceType().getElementType());
-    return success();
-  }
-};
-
-template <typename TensorReshapeOp>
-struct FoldInitTensorWithTensorReshapeOp
-    : public OpRewritePattern<TensorReshapeOp> {
-  using OpRewritePattern<TensorReshapeOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(TensorReshapeOp reshapeOp,
-                                PatternRewriter &rewriter) const override {
-    if (!reshapeOp.getSrc().template getDefiningOp<InitTensorOp>())
-      return failure();
-    Location loc = reshapeOp.getLoc();
-    ReifiedRankedShapedTypeDims resultShapes;
-    ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface =
-        cast<ReifyRankedShapedTypeOpInterface>(reshapeOp.getOperation());
-    if (failed(reifyShapedTypeInterface.reifyResultShapes(rewriter,
-                                                          resultShapes)) ||
-        !llvm::hasSingleElement(resultShapes))
-      return failure();
-    Value initTensor = rewriter.create<InitTensorOp>(
-        loc, getAsOpFoldResult(resultShapes[0]),
-        reshapeOp.getResultType().getElementType());
-    if (initTensor.getType() != reshapeOp.getResultType()) {
-      rewriter.replaceOpWithNewOp<tensor::CastOp>(
-          reshapeOp, reshapeOp.getResultType(), initTensor);
-    } else {
-      rewriter.replaceOp(reshapeOp, initTensor);
-    }
-    return success();
-  }
-};
-
-struct FoldInitTensorWithDimOp : public OpRewritePattern<tensor::DimOp> {
-  using OpRewritePattern<tensor::DimOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(tensor::DimOp dimOp,
-                                PatternRewriter &rewriter) const override {
-    Optional<int64_t> maybeConstantIndex = dimOp.getConstantIndex();
-    auto initTensorOp = dimOp.getSource().getDefiningOp<linalg::InitTensorOp>();
-    if (!initTensorOp || !maybeConstantIndex)
-      return failure();
-    if (!initTensorOp.isDynamicSize(*maybeConstantIndex))
-      return failure();
-    rewriter.replaceOp(dimOp, initTensorOp.getDynamicSize(*maybeConstantIndex));
-    return success();
-  }
-};
-
-/// Canonicalize
-///
-/// ```mlir
-///   %0 = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
-///   %1 = tensor.cast %0 : tensor<?x?xf32> to tensor<4x?xf32>
-/// ```
-///
-/// into
-///
-/// ```mlir
-///   %0 = linalg.init_tensor [4, %d1] : tensor<4x?xf32>
-/// ```
-///
-/// This assumes the input program is correct in terms of its shape. So it
-/// is safe to assume that `%d0` is in fact 4. If that was not the case, the
-/// input program is wrong to begin with, so its undefined behavior anyway (i.e.
-/// this optimization can still triggering without violating program semantics).
-struct FoldInitTensorWithTensorCastOp
-    : public OpRewritePattern<tensor::CastOp> {
-  using OpRewritePattern<tensor::CastOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(tensor::CastOp castOp,
-                                PatternRewriter &rewriter) const override {
-    if (!canFoldIntoProducerOp(castOp))
-      return failure();
-    auto producer = castOp.getSource().getDefiningOp<InitTensorOp>();
-    if (!producer)
-      return failure();
-
-    auto resultType = castOp->getResult(0).getType().cast<RankedTensorType>();
-    ArrayRef<int64_t> resultShape = resultType.getShape();
-    SmallVector<OpFoldResult> currMixedSizes = producer.getMixedSizes();
-    SmallVector<OpFoldResult> newMixedSizes;
-    newMixedSizes.reserve(currMixedSizes.size());
-    assert(resultShape.size() == currMixedSizes.size() &&
-           "mismatch in result shape and sizes of init_tensor op");
-    for (auto it : llvm::zip(resultShape, currMixedSizes)) {
-      int64_t newDim = std::get<0>(it);
-      OpFoldResult currDim = std::get<1>(it);
-      // Case 1: The init tensor dim is static. Check that the tensor cast
-      // result dim matches.
-      if (auto attr = currDim.dyn_cast<Attribute>()) {
-        if (ShapedType::isDynamic(newDim) ||
-            newDim != attr.cast<IntegerAttr>().getInt()) {
-          // Something is off, the cast result shape cannot be more dynamic than
-          // the init tensor result shape (enforced by `canFoldIntoProducer`).
-          // Abort for now.
-          return rewriter.notifyMatchFailure(
-              producer, "mismatch in static value of shape of init "
-                        "tensor result and cast result");
-        }
-        newMixedSizes.push_back(attr);
-        continue;
-      }
-
-      // Case 2 : The tensor cast shape is static, but init tensor result shape
-      // is dynamic.
-      if (!ShapedType::isDynamic(newDim)) {
-        newMixedSizes.push_back(rewriter.getIndexAttr(newDim));
-        continue;
-      }
-
-      // Case 3 : The tensor cast shape is dynamic and init tensor result shape
-      // is dynamic. Use the dynamic value from the init tensor op.
-      newMixedSizes.push_back(currDim);
-    }
-
-    rewriter.replaceOpWithNewOp<InitTensorOp>(castOp, newMixedSizes,
-                                              resultType.getElementType());
-    return success();
-  }
-};
-
-} // namespace
-
-void InitTensorOp::getCanonicalizationPatterns(RewritePatternSet &results,
-                                               MLIRContext *context) {
-  results.add<FoldInitTensorWithTensorCastOp, FoldInitTensorWithDimOp,
-              FoldInitTensorWithExtractSliceOp,
-              FoldInitTensorWithTensorReshapeOp<tensor::ExpandShapeOp>,
-              FoldInitTensorWithTensorReshapeOp<tensor::CollapseShapeOp>,
-              ReplaceStaticShapeDims>(context);
-}
-
-LogicalResult InitTensorOp::reifyResultShapes(
-    OpBuilder &builder, ReifiedRankedShapedTypeDims &reifiedReturnShapes) {
-  auto shapes = llvm::to_vector<4>(llvm::map_range(
-      llvm::seq<int64_t>(0, getType().getRank()), [&](int64_t dim) -> Value {
-        if (isDynamicSize(dim))
-          return getDynamicSize(dim);
-        return builder.create<arith::ConstantIndexOp>(getLoc(),
-                                                      getStaticSize(dim));
-      }));
-  reifiedReturnShapes.emplace_back(std::move(shapes));
-  return success();
-}
-
 //===----------------------------------------------------------------------===//
 // YieldOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Linalg/Transforms/DecomposeLinalgOps.cpp b/mlir/lib/Dialect/Linalg/Transforms/DecomposeLinalgOps.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/DecomposeLinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/DecomposeLinalgOps.cpp
@@ -46,7 +46,7 @@
 /// gets split into
 ///
 /// ```mlir
-/// %init = linalg.init_tensor ...
+/// %init = tensor.empty ...
 /// %op0:3 = linalg.generic ... ins(%arg0, %arg1, %arg2 : ...)
 ///      outs(%init0, %init1, %init : ...)
 ///    ^bb0(%b0: ... , %b1: ... , %b2: ... , %b3: ..., %b4: ..., %b5: ...):
@@ -65,7 +65,7 @@
 /// After canonicalization this is expected to be
 ///
 /// ```mlir
-/// %init = linalg.init_tensor ...
+/// %init = tensor.empty ...
 /// %op0 = linalg.generic ... ins(%arg0, %arg1, : ...)
 ///      outs(%init : ...)
 ///    ^bb0(%b0: ... , %b1: ... , %b2: ...):
@@ -186,10 +186,10 @@
 
     // Fall back path, use an `init_tensor` and identity indexing map.
     AffineMap indexingMap = rewriter.getMultiDimIdentityMap(domain.size());
-    Value initTensor = rewriter.create<linalg::InitTensorOp>(
-        loc, domain, scalarOpResult.getType());
-    newInitValues.push_back(initTensor);
-    newResultTypes.push_back(initTensor.getType());
+    Value emptyTensor =
+        rewriter.create<tensor::EmptyOp>(loc, domain, scalarOpResult.getType());
+    newInitValues.push_back(emptyTensor);
+    newResultTypes.push_back(emptyTensor.getType());
     peeledGenericOpIndexingMaps.push_back(indexingMap);
   }
 
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp
@@ -188,7 +188,7 @@
     /// For the following snippet:
     /// ...
     /// ^bb1(%6: tensor<i32>, %9: tensor<i32>):
-    ///   %7 = linalg.init_tensor [] : tensor<i32>
+    ///   %7 = tensor.empty() : tensor<i32>
     ///   %8 = linalg.generic #attrs
     ///     ins(%6, %6 : tensor<i32>, tensor<i32>)
     ///     outs(%7 : tensor<i32>) {
diff --git a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
@@ -540,8 +540,8 @@
                RankReducedInsertSliceOp<tensor::ParallelInsertSliceOp>>(
       context);
   linalg::FillOp::getCanonicalizationPatterns(patterns, context);
-  linalg::InitTensorOp::getCanonicalizationPatterns(patterns, context);
   tensor::CollapseShapeOp::getCanonicalizationPatterns(patterns, context);
+  tensor::EmptyOp::getCanonicalizationPatterns(patterns, context);
   tensor::ExpandShapeOp::getCanonicalizationPatterns(patterns, context);
 }
 
diff --git a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
@@ -1004,7 +1004,7 @@
 // ```mlir
 // #map = affine_map<(d0, d1) -> (d0, d1)>
 // %1 = tensor.expand_shape %0 [[0, 1]] : tensor<?xf32> into tensor<?x4xf32>
-// %2 = linalg.init_tensor [..] : tensor<?x4xf32>
+// %2 = tensor.empty [..] : tensor<?x4xf32>
 // %3 = linalg.generic {
 //     indexing_maps = [#map, #map],
 //     iterator_types = ["parallel" ,"parallel"]}
@@ -1015,7 +1015,7 @@
 //
 // ```mlir
 // #map = affine_map<(d0) -> (d0)>
-// %2 = linalg.init_tensor [..] : tensor<?xf32>
+// %2 = tensor.empty [..] : tensor<?xf32>
 // %3 = linalg.generic {
 //     indexing_maps = [#map, #map],
 //     iterator_types = ["parallel"]}
@@ -1029,7 +1029,7 @@
 // #map0 = affine_map<(d0, d1) -> (d0, d1)>
 // #map1 = affine_map<(d0, d1) -> (d1, d0)>
 // %1 = tensor.expand_shape %0 [[0, 1]] : tensor<?xf32> into tensor<?x4xf32>
-// %2 = linalg.init_tensor [..] : tensor<4x?xf32>
+// %2 = tensor.empty [..] : tensor<4x?xf32>
 // %2 = linalg.generic {
 //     indexing_maps = [#map0, #map1],
 //     iterator_types = ["parallel" ,"parallel"]}
@@ -1645,8 +1645,8 @@
 //===---------------------------------------------------------------------===//
 
 namespace {
-/// Forces `outs` operands of linalg operations to use `linalg.init_tensor` if
-/// the value of the `outs` operand is not used within the op.  This is only
+/// Forces `outs` operands of linalg operations to use `tensor.empty` if the
+/// value of the `outs` operand is not used within the op.  This is only
 /// implemented for `linalg.generic` operations for now, but should hold for all
 /// linalg structured ops.
 struct RemoveOutsDependency : public OpRewritePattern<GenericOp> {
@@ -1668,8 +1668,8 @@
         if (sparse_tensor::getSparseTensorEncoding(operandVal.getType()))
           continue;
 
-        // If outs is already an `init_tensor` operation, nothing to do.
-        auto definingOp = operandVal.getDefiningOp<InitTensorOp>();
+        // If outs is already an `empty` operation, nothing to do.
+        auto definingOp = operandVal.getDefiningOp<tensor::EmptyOp>();
         if (definingOp)
           continue;
         modifiedOutput = true;
@@ -1680,10 +1680,10 @@
           dynamicDims.push_back(rewriter.createOrFold<tensor::DimOp>(
               loc, operandVal, dim.index()));
         }
-        Value initTensor = rewriter.create<InitTensorOp>(
-            loc, dynamicDims, operandType.getShape(),
-            operandType.getElementType());
-        op->setOperand(opOperand->getOperandNumber(), initTensor);
+        Value emptyTensor = rewriter.create<tensor::EmptyOp>(
+            loc, operandType.getShape(), operandType.getElementType(),
+            dynamicDims);
+        op->setOperand(opOperand->getOperandNumber(), emptyTensor);
       }
     }
     if (!modifiedOutput) {
diff --git a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseToLinalg.cpp b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseToLinalg.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseToLinalg.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseToLinalg.cpp
@@ -37,7 +37,7 @@
 ///   1. `v.getType() == t`
 ///   2. If an operand of `op` has type `t`, let `operand_first` be the first
 ///      such operand. Then`v == operand_first`.
-///   3. Otherwise, v is a newly created `linalg::InitTensorOp` with:
+///   3. Otherwise, v is a newly created `tensor::EmptyOp` with:
 ///        a. Static and dynamic dims extracted from the first operand of `op`.
 ///        b. Elemental type equal to the elemental type of `t`.
 ///
@@ -71,8 +71,8 @@
     auto staticShape = llvm::to_vector<4>(rankedTensorType.getShape());
     auto dynamicShape = linalg::getDynOperands(loc, firstOperand, b);
 
-    res.push_back(b.create<linalg::InitTensorOp>(
-        loc, dynamicShape, staticShape, rankedTensorType.getElementType()));
+    res.push_back(b.create<tensor::EmptyOp>(
+        loc, staticShape, rankedTensorType.getElementType(), dynamicShape));
   }
   return res;
 }
diff --git a/mlir/lib/Dialect/Linalg/Transforms/FusePadOpWithLinalgProducer.cpp b/mlir/lib/Dialect/Linalg/Transforms/FusePadOpWithLinalgProducer.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/FusePadOpWithLinalgProducer.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/FusePadOpWithLinalgProducer.cpp
@@ -76,14 +76,14 @@
     // Create the tensor of same size as output of the pad op.
     RankedTensorType padResultType = padOp.getResultType();
     auto resultSizes = getAsOpFoldResult(resultShape[0]);
-    auto initTensor = rewriter.create<linalg::InitTensorOp>(
+    auto emptyTensor = rewriter.create<tensor::EmptyOp>(
         loc, resultSizes, padResultType.getElementType());
 
     // Fill the tensor with the pad value.
     // TODO: There is an option to fill only the boundaries. For now just
     // filling the whole tensor.
     auto fillTensor =
-        rewriter.create<linalg::FillOp>(loc, padValue, initTensor.getResult());
+        rewriter.create<linalg::FillOp>(loc, padValue, emptyTensor.getResult());
 
     // Construct a slice of the fill result that is to be replaced with the
     // result of the generic op. The low pad values are the offsets, the size of
diff --git a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
@@ -431,9 +431,9 @@
   llvm::append_range(packedShape, transposedTensorType->getShape());
   auto packedTensorType = RankedTensorType::get(
       packedShape, transposedTensorType->getElementType());
-  Value packedTensor = b.create<linalg::InitTensorOp>(
-      loc, dynamicTensorSizes, packedTensorType.getShape(),
-      packedTensorType.getElementType());
+  Value packedTensor = b.create<tensor::EmptyOp>(
+      loc, packedTensorType.getShape(), packedTensorType.getElementType(),
+      dynamicTensorSizes);
 
   // Clone the operations involved in the backward slice, iteratively stepping
   // into the loops that we encounter.
@@ -543,11 +543,10 @@
 
   // Transpose the packed tensor back to the original storage order.
   if (!transposeVector.empty()) {
-    Value initTensor =
-        b.create<InitTensorOp>(loc, ValueRange{}, paddedTensorType.getShape(),
-                               paddedTensorType.getElementType());
+    Value emptyTensor = b.create<tensor::EmptyOp>(
+        loc, paddedTensorType.getShape(), paddedTensorType.getElementType());
     transposeOps.push_back(
-        makeTransposeOp(b, loc, newResult, initTensor, transposeVector));
+        makeTransposeOp(b, loc, newResult, emptyTensor, transposeVector));
     newResult = transposeOps.back()->getResult(0);
   }
 
diff --git a/mlir/lib/Dialect/Linalg/Transforms/InitTensorToAllocTensor.cpp b/mlir/lib/Dialect/Linalg/Transforms/InitTensorToAllocTensor.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/InitTensorToAllocTensor.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/InitTensorToAllocTensor.cpp
@@ -9,52 +9,52 @@
 #include "mlir/Dialect/Linalg/Passes.h"
 
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
 namespace mlir {
-#define GEN_PASS_DEF_LINALGINITTENSORTOALLOCTENSOR
+#define GEN_PASS_DEF_EMPTYTENSORTOALLOCTENSOR
 #include "mlir/Dialect/Linalg/Passes.h.inc"
 } // namespace mlir
 
 using namespace mlir;
 using namespace mlir::bufferization;
-using namespace mlir::linalg;
+using namespace mlir::tensor;
 
 namespace {
-struct InitTensorLoweringPattern : public OpRewritePattern<InitTensorOp> {
-  using OpRewritePattern<InitTensorOp>::OpRewritePattern;
+struct EmptyTensorLoweringPattern : public OpRewritePattern<tensor::EmptyOp> {
+  using OpRewritePattern<tensor::EmptyOp>::OpRewritePattern;
 
-  LogicalResult matchAndRewrite(InitTensorOp op,
+  LogicalResult matchAndRewrite(tensor::EmptyOp op,
                                 PatternRewriter &rewriter) const override {
-    rewriter.replaceOpWithNewOp<bufferization::AllocTensorOp>(op, op.getType(),
-                                                              op.getSizes());
+    rewriter.replaceOpWithNewOp<bufferization::AllocTensorOp>(
+        op, op.getType(), op.getDynamicSizes());
     return success();
   }
 };
 
-struct LinalgInitTensorToAllocTensor
-    : public impl::LinalgInitTensorToAllocTensorBase<
-          LinalgInitTensorToAllocTensor> {
-  LinalgInitTensorToAllocTensor() = default;
+struct EmptyTensorToAllocTensor
+    : public impl::EmptyTensorToAllocTensorBase<EmptyTensorToAllocTensor> {
+  EmptyTensorToAllocTensor() = default;
 
   void runOnOperation() override;
 
   void getDependentDialects(DialectRegistry &registry) const override {
     registry
-        .insert<linalg::LinalgDialect, bufferization::BufferizationDialect>();
+        .insert<tensor::TensorDialect, bufferization::BufferizationDialect>();
   }
 };
 } // namespace
 
-void LinalgInitTensorToAllocTensor::runOnOperation() {
+void EmptyTensorToAllocTensor::runOnOperation() {
   Operation *op = getOperation();
   RewritePatternSet patterns(op->getContext());
-  patterns.insert<InitTensorLoweringPattern>(op->getContext());
+  patterns.insert<EmptyTensorLoweringPattern>(op->getContext());
   if (failed(applyPatternsAndFoldGreedily(op, std::move(patterns))))
     signalPassFailure();
 }
 
-std::unique_ptr<Pass> mlir::createLinalgInitTensorToAllocTensorPass() {
-  return std::make_unique<LinalgInitTensorToAllocTensor>();
+std::unique_ptr<Pass> mlir::createEmptyTensorToAllocTensorPass() {
+  return std::make_unique<EmptyTensorToAllocTensor>();
 }
diff --git a/mlir/lib/Dialect/Linalg/Transforms/SplitReduction.cpp b/mlir/lib/Dialect/Linalg/Transforms/SplitReduction.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/SplitReduction.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/SplitReduction.cpp
@@ -196,20 +196,20 @@
           b.getAffineDimExpr(dim < insertSplitDimension ? dim : dim + 1));
     }
   }
-  Value initOrAllocTensor;
+  Value emptyOrAllocTensor;
   if (useAlloc) {
-    initOrAllocTensor = b.create<bufferization::AllocTensorOp>(
+    emptyOrAllocTensor = b.create<bufferization::AllocTensorOp>(
         loc,
         RankedTensorType::get(newOutputShape,
                               op.getRegionOutputArgs()[0].getType()),
         ValueRange{});
   } else {
-    initOrAllocTensor = b.create<linalg::InitTensorOp>(
+    emptyOrAllocTensor = b.create<tensor::EmptyOp>(
         loc, newOutputShape, op.getRegionOutputArgs()[0].getType());
   }
   Value constantOp = b.create<arith::ConstantOp>(loc, identity);
   Value identityTensor =
-      b.create<linalg::FillOp>(op->getLoc(), constantOp, initOrAllocTensor)
+      b.create<linalg::FillOp>(op->getLoc(), constantOp, emptyOrAllocTensor)
           .getResult(0);
 
   newMaps.push_back(AffineMap::get(oldOutputMap.getNumDims() + 1, 0, outputExpr,
@@ -225,7 +225,7 @@
   // Create the new op matching the original op with an extra parallel
   // dimension.
   GenericOp genericOp = b.create<GenericOp>(
-      loc, TypeRange({initOrAllocTensor.getType()}), newInputs,
+      loc, TypeRange({emptyOrAllocTensor.getType()}), newInputs,
       ValueRange({identityTensor}), newMaps, newIteratorTypes);
   b.inlineRegionBefore(op->getRegion(0), genericOp.getRegion(),
                        genericOp.getRegion().begin());
@@ -259,9 +259,10 @@
       });
   b.replaceOp(op, reduction.getResults());
 
-  return SplitReductionResult{
-      initOrAllocTensor.getDefiningOp(), identityTensor.getDefiningOp<FillOp>(),
-      cast<LinalgOp>(genericOp.getOperation()), reduction};
+  return SplitReductionResult{emptyOrAllocTensor.getDefiningOp(),
+                              identityTensor.getDefiningOp<FillOp>(),
+                              cast<LinalgOp>(genericOp.getOperation()),
+                              reduction};
 }
 
 /// Rewrite f(i, j, k, ...) into f(i, j, k * ratio + kk, ...)
@@ -357,7 +358,7 @@
   // TODO: generalize when multi-reduction support is available.
   SmallVector<Value> newOutputs;
   newOutputs.reserve(op.getNumOutputs());
-  SmallVector<Operation *> initOrAllocTensorOps;
+  SmallVector<Operation *> emptyOrAllocTensorOps;
   SmallVector<linalg::FillOp> fillOps;
   fillOps.reserve(op.getNumOutputs());
   for (auto it : llvm::zip(op.outputs(), neutralElements)) {
@@ -367,19 +368,19 @@
         reductionDimSize / splitFactor, insertSplitDimension);
     SmallVector<Value> dims =
         tensor::createDynamicDimValues(b, loc, rankedTensor);
-    Value initOrAllocTensor;
+    Value emptyOrAllocTensor;
     if (useAlloc) {
-      initOrAllocTensor =
+      emptyOrAllocTensor =
           b.create<bufferization::AllocTensorOp>(loc, newT, dims);
     } else {
-      initOrAllocTensor = b.create<linalg::InitTensorOp>(
-          loc, dims, newT.getShape(), t.getElementType());
+      emptyOrAllocTensor = b.create<tensor::EmptyOp>(loc, newT.getShape(),
+                                                     t.getElementType(), dims);
     }
     Value constantOp = b.create<arith::ConstantOp>(loc, std::get<1>(it));
     fillOps.push_back(
-        b.create<linalg::FillOp>(op->getLoc(), constantOp, initOrAllocTensor));
+        b.create<linalg::FillOp>(op->getLoc(), constantOp, emptyOrAllocTensor));
     newOutputs.push_back(fillOps.back().getResult(0));
-    initOrAllocTensorOps.push_back(initOrAllocTensor.getDefiningOp());
+    emptyOrAllocTensorOps.push_back(emptyOrAllocTensor.getDefiningOp());
   }
 
   // Step 2. Reindex / expand indexing maps.
@@ -406,7 +407,7 @@
   auto newInputs = llvm::to_vector<4>(op.inputs());
   // Add a single shape-only tensor to carry the dimensions without resorting to
   // more complex inversions.
-  newInputs.push_back(b.create<linalg::InitTensorOp>(
+  newInputs.push_back(b.create<tensor::EmptyOp>(
       loc, ArrayRef<int64_t>{reductionDimSize / splitFactor, splitFactor},
       b.getIntegerType(1)));
   // Output tensors are already good to go.
@@ -470,7 +471,7 @@
   // TODO: extend when multi-reduction support is available.
   assert(fillOps.size() == results.size() && results.size() == 1);
   b.replaceOp(op, results.front()->getResults());
-  return SplitReductionResult{initOrAllocTensorOps.front(), fillOps.front(),
+  return SplitReductionResult{emptyOrAllocTensorOps.front(), fillOps.front(),
                               cast<LinalgOp>(genericOp.getOperation()),
                               results.front()};
 }
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@@ -721,10 +721,9 @@
   scf::ParallelOp::getCanonicalizationPatterns(patterns, ctx);
 
   tensor::CastOp::getCanonicalizationPatterns(patterns, ctx);
+  tensor::EmptyOp::getCanonicalizationPatterns(patterns, ctx);
   tensor::ExtractSliceOp::getCanonicalizationPatterns(patterns, ctx);
   tensor::InsertSliceOp::getCanonicalizationPatterns(patterns, ctx);
-
-  InitTensorOp::getCanonicalizationPatterns(patterns, ctx);
   tensor::PadOp::getCanonicalizationPatterns(patterns, ctx);
   ctx->getLoadedDialect<LinalgDialect>()->getCanonicalizationPatterns(patterns);
 
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -630,7 +630,7 @@
   return SmallVector<StringRef>(nParallelLoops, getParallelIteratorTypeName());
 }
 
-/// Rewrite a tensor::PadOp into a sequence of InitTensorOp, FillOp (to
+/// Rewrite a tensor::PadOp into a sequence of EmptyOp, FillOp (to
 /// initialize with pad_val) and GenericOp (to copy contents).
 LogicalResult
 PadOpTransformationPattern::matchAndRewrite(tensor::PadOp padOp,
@@ -661,13 +661,13 @@
   Location loc = padOp.getLoc();
   SmallVector<Value> indices(resultShapedType.getRank(),
                              rewriter.create<arith::ConstantIndexOp>(loc, 0));
-  Value initTensor = rewriter.create<InitTensorOp>(
+  Value emptyTensor = rewriter.create<tensor::EmptyOp>(
       loc, resultShapedType.getShape(), resultShapedType.getElementType());
 
   // Initialize tensor with the pad value
   Value tmpTensor = rewriter
                         .create<linalg::FillOp>(loc, ValueRange{padValue},
-                                                ValueRange{initTensor})
+                                                ValueRange{emptyTensor})
                         .result();
 
   // Copy original contents into new tensor
@@ -725,8 +725,7 @@
   };
 
   auto resultType = padOp.getResultType();
-  // Compute size of InitTensorOp. Any combination of static/dynamic is
-  // supported.
+  // Compute size of EmptyOp. Any combination of static/dynamic is supported.
   SmallVector<Value> dynSizes;
   SmallVector<int64_t> staticSizes;
   for (unsigned dim = 0; dim < resultType.getRank(); ++dim) {
@@ -744,9 +743,9 @@
   }
 
   // Init tensor and fill it with padding.
-  Value init = rewriter.create<InitTensorOp>(
-      padOp.getLoc(), dynSizes, staticSizes, resultType.getElementType());
-  Value fill = createFillOrGenerateOp(rewriter, padOp, init, dynSizes);
+  Value emptyTensor = rewriter.create<tensor::EmptyOp>(
+      padOp.getLoc(), staticSizes, resultType.getElementType(), dynSizes);
+  Value fill = createFillOrGenerateOp(rewriter, padOp, emptyTensor, dynSizes);
 
   // Try optimize the copy of source.
   if (optimizeCopyFn && optimizeCopyFn(rewriter, padOp, fill).succeeded())
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -686,7 +686,7 @@
   return result;
 }
 
-/// Rewrite a tensor::PadOp into a sequence of InitTensorOp, FillOp and
+/// Rewrite a tensor::PadOp into a sequence of EmptyOp, FillOp and
 /// InsertSliceOp. For now, only constant padding values are supported.
 /// If there is enough static type information, TransferReadOps and
 /// TransferWriteOps may be generated instead of InsertSliceOps.
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
--- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
@@ -338,7 +338,7 @@
 
 /// Returns true if tensor materializes uninitialized into the computation.
 static bool isMaterializing(Value val) {
-  return val.getDefiningOp<linalg::InitTensorOp>() ||
+  return val.getDefiningOp<tensor::EmptyOp>() ||
          val.getDefiningOp<bufferization::AllocTensorOp>();
 }
 
diff --git a/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt b/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt
--- a/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt
@@ -19,6 +19,7 @@
   Core
 
   LINK_LIBS PUBLIC
+  MLIRAffineDialect
   MLIRArithDialect
   MLIRArithUtils
   MLIRCastInterfaces
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp b/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp
--- a/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/Complex/IR/Complex.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
--- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
@@ -416,6 +416,282 @@
   results.add<DimOfCastOp>(context);
 }
 
+//===----------------------------------------------------------------------===//
+// EmptyOp
+//===----------------------------------------------------------------------===//
+
+void EmptyOp::build(OpBuilder &builder, OperationState &result,
+                    ArrayRef<int64_t> staticShape, Type elementType) {
+  assert(all_of(staticShape,
+                [](int64_t sz) { return !ShapedType::isDynamic(sz); }) &&
+         "expected only static sizes");
+  build(builder, result, staticShape, elementType, {});
+}
+
+void EmptyOp::build(OpBuilder &builder, OperationState &result,
+                    ArrayRef<int64_t> staticShape, Type elementType,
+                    ValueRange dynamicSizes) {
+  auto tensorType = RankedTensorType::get(staticShape, elementType);
+  build(builder, result, tensorType, dynamicSizes);
+}
+
+void EmptyOp::build(OpBuilder &builder, OperationState &result,
+                    ArrayRef<OpFoldResult> sizes, Type elementType) {
+  SmallVector<int64_t> staticShape;
+  SmallVector<Value> dynamicSizes;
+  dispatchIndexOpFoldResults(sizes, dynamicSizes, staticShape,
+                             ShapedType::kDynamicSize);
+  build(builder, result, staticShape, elementType, dynamicSizes);
+}
+
+LogicalResult EmptyOp::verify() {
+  if (getType().getNumDynamicDims() != getDynamicSizes().size())
+    return emitOpError("incorrect number of dynamic sizes, has ")
+           << getDynamicSizes().size() << ", expected "
+           << getType().getNumDynamicDims();
+  return success();
+}
+
+LogicalResult
+EmptyOp::reifyResultShapes(OpBuilder &builder,
+                           ReifiedRankedShapedTypeDims &reifiedReturnShapes) {
+  reifiedReturnShapes.resize(1, SmallVector<Value>(getType().getRank()));
+  unsigned ctr = 0;
+  for (int64_t i = 0; i < getType().getRank(); ++i) {
+    if (getType().isDynamicDim(i)) {
+      reifiedReturnShapes[0][i] = getDynamicSizes()[ctr++];
+    } else {
+      reifiedReturnShapes[0][i] =
+          builder.create<arith::ConstantIndexOp>(getLoc(), i);
+    }
+  }
+  return success();
+}
+
+Value EmptyOp::getDynamicSize(unsigned idx) {
+  assert(getType().isDynamicDim(idx) && "expected dynamic dim");
+  unsigned ctr = 0;
+  for (int64_t i = 0; i < static_cast<int64_t>(idx); ++i)
+    if (getType().isDynamicDim(i))
+      ++ctr;
+  return getDynamicSizes()[ctr];
+}
+
+SmallVector<OpFoldResult> EmptyOp::getMixedSizes() {
+  SmallVector<OpFoldResult> result;
+  unsigned ctr = 0;
+  OpBuilder b(getContext());
+  for (int64_t i = 0; i < getType().getRank(); ++i) {
+    if (getType().isDynamicDim(i)) {
+      result.push_back(getDynamicSizes()[ctr++]);
+    } else {
+      result.push_back(b.getIndexAttr(getType().getShape()[i]));
+    }
+  }
+  return result;
+}
+
+namespace {
+/// Change the type of the result of a `tensor.empty` by making the result
+/// type statically sized along dimensions that in the original operation were
+/// defined as dynamic, but the size was defined using a `constant` op. For
+/// example
+///
+///  %c5 = arith.constant 5: index
+///  %0 = tensor.empty(%arg0, %c5) : tensor<?x?xf32>
+///
+///  to
+///
+///  %0 = tensor.empty(%arg0) : tensor<?x5xf32>
+struct ReplaceEmptyTensorStaticShapeDims : OpRewritePattern<EmptyOp> {
+  using OpRewritePattern<EmptyOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(EmptyOp op,
+                                PatternRewriter &rewriter) const override {
+    SmallVector<int64_t> staticShape(op.getType().getShape().begin(),
+                                     op.getType().getShape().end());
+    SmallVector<Value> dynamicSizes;
+
+    // Compute new static and dynamic sizes.
+    unsigned ctr = 0;
+    bool changedType = false;
+    for (int64_t i = 0; i < op.getType().getRank(); ++i) {
+      if (op.getType().isDynamicDim(i)) {
+        Value dynamicSize = op.getDynamicSizes()[ctr++];
+        Optional<int64_t> cst = getConstantIntValue(dynamicSize);
+        if (cst.hasValue()) {
+          staticShape[i] = *cst;
+          changedType = true;
+        } else {
+          dynamicSizes.push_back(dynamicSize);
+        }
+      }
+    }
+
+    // Stop here if no dynamic size was promoted to static.
+    if (!changedType)
+      return failure();
+
+    auto tensorType = RankedTensorType::get(
+        staticShape, op.getType().getElementType(), op.getType().getEncoding());
+    auto newOp =
+        rewriter.create<EmptyOp>(op.getLoc(), tensorType, dynamicSizes);
+    rewriter.replaceOpWithNewOp<tensor::CastOp>(op, op.getType(), newOp);
+    return success();
+  }
+};
+
+/// `tensor.empty` does not define any tensor contents, so a slice of a
+/// `tensor.empty` can be canonicalized to a smaller `tensor.empty`.
+struct FoldEmptyTensorWithExtractSliceOp
+    : public OpRewritePattern<ExtractSliceOp> {
+  using OpRewritePattern<ExtractSliceOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(ExtractSliceOp sliceOp,
+                                PatternRewriter &rewriter) const override {
+    if (!sliceOp.getSource().getDefiningOp<EmptyOp>())
+      return failure();
+
+    // ExtractSliceOp may be rank-reducing; its dynamic sizes must be
+    // preserved as well as its result type.
+    auto tensorType = RankedTensorType::get(sliceOp.getType().getShape(),
+                                            sliceOp.getType().getElementType(),
+                                            sliceOp.getType().getEncoding());
+    rewriter.replaceOpWithNewOp<EmptyOp>(sliceOp, tensorType,
+                                         sliceOp.getSizes());
+    return success();
+  }
+};
+
+template <typename ReshapeOp>
+struct FoldEmptyTensorWithReshapeOp : public OpRewritePattern<ReshapeOp> {
+  using OpRewritePattern<ReshapeOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(ReshapeOp reshapeOp,
+                                PatternRewriter &rewriter) const override {
+    if (!reshapeOp.getSrc().template getDefiningOp<EmptyOp>())
+      return failure();
+    Location loc = reshapeOp.getLoc();
+    ReifiedRankedShapedTypeDims resultShapes;
+    ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface =
+        cast<ReifyRankedShapedTypeOpInterface>(reshapeOp.getOperation());
+    if (failed(reifyShapedTypeInterface.reifyResultShapes(rewriter,
+                                                          resultShapes)) ||
+        !llvm::hasSingleElement(resultShapes))
+      return failure();
+    // TODO: Do not drop tensor type encoding.
+    Value emptyTensor =
+        rewriter.create<EmptyOp>(loc, getAsOpFoldResult(resultShapes[0]),
+                                 reshapeOp.getResultType().getElementType());
+    if (emptyTensor.getType() != reshapeOp.getResultType()) {
+      rewriter.replaceOpWithNewOp<tensor::CastOp>(
+          reshapeOp, reshapeOp.getResultType(), emptyTensor);
+    } else {
+      rewriter.replaceOp(reshapeOp, emptyTensor);
+    }
+    return success();
+  }
+};
+
+struct FoldEmptyTensorWithDimOp : public OpRewritePattern<DimOp> {
+  using OpRewritePattern<DimOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(tensor::DimOp dimOp,
+                                PatternRewriter &rewriter) const override {
+    Optional<int64_t> maybeConstantIndex = dimOp.getConstantIndex();
+    auto emptyTensorOp = dimOp.getSource().getDefiningOp<EmptyOp>();
+    if (!emptyTensorOp || !maybeConstantIndex)
+      return failure();
+    if (!emptyTensorOp.getType().isDynamicDim(*maybeConstantIndex))
+      return failure();
+    rewriter.replaceOp(dimOp,
+                       emptyTensorOp.getDynamicSize(*maybeConstantIndex));
+    return success();
+  }
+};
+
+/// Canonicalize
+///
+/// ```mlir
+///   %0 = tensor.empty(%d0, %d1) : tensor<?x?xf32>
+///   %1 = tensor.cast %0 : tensor<?x?xf32> to tensor<4x?xf32>
+/// ```
+///
+/// into
+///
+/// ```mlir
+///   %0 = tensor.empty(%d1) : tensor<4x?xf32>
+/// ```
+///
+/// This assumes the input program is correct in terms of its shape. So it is
+/// safe to assume that `%d0` is in fact 4.
+struct FoldEmptyTensorWithCastOp : public OpRewritePattern<CastOp> {
+  using OpRewritePattern<CastOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(CastOp castOp,
+                                PatternRewriter &rewriter) const override {
+    if (!canFoldIntoProducerOp(castOp))
+      return failure();
+    auto producer = castOp.getSource().getDefiningOp<EmptyOp>();
+    if (!producer)
+      return failure();
+
+    auto resultType = castOp->getResult(0).getType().cast<RankedTensorType>();
+    ArrayRef<int64_t> resultShape = resultType.getShape();
+    SmallVector<OpFoldResult> currMixedSizes = producer.getMixedSizes();
+    SmallVector<OpFoldResult> newMixedSizes;
+    newMixedSizes.reserve(currMixedSizes.size());
+    assert(resultShape.size() == currMixedSizes.size() &&
+           "mismatch in result shape and sizes of empty op");
+    for (auto it : llvm::zip(resultShape, currMixedSizes)) {
+      int64_t newDim = std::get<0>(it);
+      OpFoldResult currDim = std::get<1>(it);
+      // Case 1: The empty tensor dim is static. Check that the tensor cast
+      // result dim matches.
+      if (auto attr = currDim.dyn_cast<Attribute>()) {
+        if (ShapedType::isDynamic(newDim) ||
+            newDim != attr.cast<IntegerAttr>().getInt()) {
+          // Something is off, the cast result shape cannot be more dynamic
+          // than the empty tensor result shape (enforced by
+          // `canFoldIntoProducer`). Abort for now.
+          return rewriter.notifyMatchFailure(
+              producer, "mismatch in static value of shape of empty tensor "
+                        "result and cast result");
+        }
+        newMixedSizes.push_back(attr);
+        continue;
+      }
+
+      // Case 2 : The tensor cast shape is static, but empty tensor result
+      // shape is dynamic.
+      if (!ShapedType::isDynamic(newDim)) {
+        newMixedSizes.push_back(rewriter.getIndexAttr(newDim));
+        continue;
+      }
+
+      // Case 3 : The tensor cast shape is dynamic and empty tensor result
+      // shape is dynamic. Use the dynamic value from the empty tensor op.
+      newMixedSizes.push_back(currDim);
+    }
+
+    // TODO: Do not drop tensor encoding.
+    rewriter.replaceOpWithNewOp<EmptyOp>(castOp, newMixedSizes,
+                                         resultType.getElementType());
+    return success();
+  }
+};
+
+} // namespace
+
+void EmptyOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                          MLIRContext *context) {
+  results.add<FoldEmptyTensorWithCastOp, FoldEmptyTensorWithDimOp,
+              FoldEmptyTensorWithExtractSliceOp,
+              FoldEmptyTensorWithReshapeOp<tensor::ExpandShapeOp>,
+              FoldEmptyTensorWithReshapeOp<tensor::CollapseShapeOp>,
+              ReplaceEmptyTensorStaticShapeDims>(context);
+}
+
 //===----------------------------------------------------------------------===//
 // ExtractOp
 //===----------------------------------------------------------------------===//
@@ -430,8 +706,8 @@
 }
 
 OpFoldResult ExtractOp::fold(ArrayRef<Attribute> operands) {
-  // If this is a splat elements attribute, simply return the value. All of the
-  // elements of a splat attribute are the same.
+  // If this is a splat elements attribute, simply return the value. All of
+  // the elements of a splat attribute are the same.
   if (Attribute tensor = operands.front())
     if (auto splatTensor = tensor.dyn_cast<SplatElementsAttr>())
       return splatTensor.getSplatValue<Attribute>();
@@ -457,8 +733,8 @@
         stride *= tensorType.getDimSize(i);
       flatIndex += indices[i] * stride;
     }
-    // Prevent out of bounds accesses. This can happen in invalid code that will
-    // never execute.
+    // Prevent out of bounds accesses. This can happen in invalid code that
+    // will never execute.
     if (static_cast<int>(fromElementsOp.getElements().size()) <= flatIndex ||
         flatIndex < 0)
       return {};
@@ -515,7 +791,8 @@
 //
 // to just %element.
 //
-// Consider expanding this to a template and handle all tensor cast operations.
+// Consider expanding this to a template and handle all tensor cast
+// operations.
 struct ExtractElementFromIndexCast
     : public OpRewritePattern<tensor::ExtractOp> {
   using OpRewritePattern<tensor::ExtractOp>::OpRewritePattern;
@@ -671,8 +948,8 @@
 }
 
 LogicalResult GenerateOp::verify() {
-  // Ensure that the tensor type has as many dynamic dimensions as are specified
-  // by the operands.
+  // Ensure that the tensor type has as many dynamic dimensions as are
+  // specified by the operands.
   RankedTensorType resultTy = getType().cast<RankedTensorType>();
   if (getNumOperands() != resultTy.getNumDynamicDims())
     return emitError("must have as many index operands as dynamic extents "
@@ -908,7 +1185,8 @@
                                             getReassociationIndices());
 }
 
-/// Compute the RankedTensorType obtained by applying `reassociation` to `type`.
+/// Compute the RankedTensorType obtained by applying `reassociation` to
+/// `type`.
 static RankedTensorType
 computeTensorReshapeCollapsedType(RankedTensorType type,
                                   ArrayRef<AffineMap> reassociation) {
@@ -1006,8 +1284,8 @@
   }
 };
 
-/// Reshape of a FromElements can be replaced with a FromElements of the result
-/// type
+/// Reshape of a FromElements can be replaced with a FromElements of the
+/// result type
 template <typename TensorReshapeOp>
 struct FoldReshapeWithFromElements : OpRewritePattern<TensorReshapeOp> {
   using OpRewritePattern<TensorReshapeOp>::OpRewritePattern;
@@ -1097,8 +1375,8 @@
     ShapedType sourceShapedTensorType, ArrayRef<int64_t> staticOffsets,
     ArrayRef<int64_t> staticSizes, ArrayRef<int64_t> staticStrides) {
   // An extract_slice op may specify only a leading subset of offset/sizes/
-  // strides in which case we complete with offset=0, sizes from memref type and
-  // strides=1.
+  // strides in which case we complete with offset=0, sizes from memref type
+  // and strides=1.
   assert(static_cast<int64_t>(staticSizes.size()) ==
              sourceShapedTensorType.getRank() &&
          "unexpected staticSizes not equal to rank of source");
@@ -1122,8 +1400,8 @@
 }
 
 /// If the rank is reduced (i.e. the desiredResultRank is smaller than the
-/// number of sizes), drop as many size 1 as needed to produce an inferred type
-/// with the desired rank.
+/// number of sizes), drop as many size 1 as needed to produce an inferred
+/// type with the desired rank.
 ///
 /// Note that there may be multiple ways to compute this rank-reduced type:
 ///   e.g. 1x6x1 can rank-reduce to either 1x6 or 6x1 2-D tensors.
@@ -1210,8 +1488,8 @@
   build(b, result, RankedTensorType(), source, offsets, sizes, strides, attrs);
 }
 
-/// Build an ExtractSliceOp with mixed static and dynamic entries packed into a
-/// Range vector.
+/// Build an ExtractSliceOp with mixed static and dynamic entries packed into
+/// a Range vector.
 void ExtractSliceOp::build(OpBuilder &b, OperationState &result, Value source,
                            ArrayRef<Range> ranges,
                            ArrayRef<NamedAttribute> attrs) {
@@ -1219,8 +1497,8 @@
   build(b, result, RankedTensorType(), source, offsets, sizes, strides, attrs);
 }
 
-/// Build an ExtractSliceOp with dynamic entries and custom result type. If the
-/// type passed is nullptr, it is inferred.
+/// Build an ExtractSliceOp with dynamic entries and custom result type. If
+/// the type passed is nullptr, it is inferred.
 void ExtractSliceOp::build(OpBuilder &b, OperationState &result,
                            RankedTensorType resultType, Value source,
                            ValueRange offsets, ValueRange sizes,
@@ -1393,9 +1671,9 @@
   }
 }
 
-/// Fold arith.constant and tensor.extract_slice into arith.constant. The folded
-/// operation might introduce more constant data; Users can control their
-/// heuristics by the control function.
+/// Fold arith.constant and tensor.extract_slice into arith.constant. The
+/// folded operation might introduce more constant data; Users can control
+/// their heuristics by the control function.
 class ConstantOpExtractSliceFolder final
     : public OpRewritePattern<ExtractSliceOp> {
 public:
@@ -1529,8 +1807,8 @@
   for (OpFoldResult ofr : op.getMixedOffsets())
     if (getConstantIntValue(ofr) != static_cast<int64_t>(0))
       return failure();
-  // Rank-reducing noops only need to inspect the leading dimensions: llvm::zip
-  // is appropriate.
+  // Rank-reducing noops only need to inspect the leading dimensions:
+  // llvm::zip is appropriate.
   auto shape = shapedType.getShape();
   for (auto it : llvm::zip(op.getMixedSizes(), shape))
     if (getConstantIntValue(std::get<0>(it)) != std::get<1>(it))
@@ -1541,8 +1819,8 @@
   return success();
 }
 
-/// If we have an ExtractSliceOp consuming an InsertSliceOp with the same slice,
-/// we can return the InsertSliceOp's source directly.
+/// If we have an ExtractSliceOp consuming an InsertSliceOp with the same
+/// slice, we can return the InsertSliceOp's source directly.
 // TODO: This only checks the immediate producer; extend to go up the
 // insert/extract chain if the slices are disjoint.
 static Value foldExtractAfterInsertSlice(ExtractSliceOp extractOp) {
@@ -1635,7 +1913,8 @@
                     ArrayAttr staticOffsets, ArrayAttr staticSizes,
                     ArrayAttr staticStrides,
                     ShapedType *expectedType = nullptr) {
-  // insert_slice is the inverse of extract_slice, use the same type inference.
+  // insert_slice is the inverse of extract_slice, use the same type
+  // inference.
   RankedTensorType expected = ExtractSliceOp::inferResultType(
       dstType, extractFromI64ArrayAttr(staticOffsets),
       extractFromI64ArrayAttr(staticSizes),
@@ -1759,9 +2038,9 @@
     Value toInsert = insertSliceOp.getSource();
     if (sourceType != insertSliceOp.getSourceType()) {
       OpBuilder::InsertionGuard g(rewriter);
-      // The only difference between InsertSliceOp and ParallelInsertSliceOp is
-      // the the insertion point is just before the ParallelCombiningOp in the
-      // parallel case.
+      // The only difference between InsertSliceOp and ParallelInsertSliceOp
+      // is the the insertion point is just before the ParallelCombiningOp in
+      // the parallel case.
       if (std::is_same<InsertOpTy, ParallelInsertSliceOp>::value)
         rewriter.setInsertionPoint(insertSliceOp->getParentOp());
       toInsert = rewriter.create<tensor::CastOp>(insertSliceOp.getLoc(),
@@ -1774,9 +2053,9 @@
   }
 };
 
-/// Fold tensor_casts with insert_slice operations. If the source or destination
-/// tensor is a tensor_cast that removes static type information, the cast is
-/// folded into the insert_slice operation. E.g.:
+/// Fold tensor_casts with insert_slice operations. If the source or
+/// destination tensor is a tensor_cast that removes static type information,
+/// the cast is folded into the insert_slice operation. E.g.:
 ///
 /// ```mlir
 ///   %1 = tensor.cast %0 : tensor<8x16xf32> to tensor<?x?xf32>
@@ -2175,7 +2454,8 @@
 ///   5) the tensor::PadOps do not have common padding dimensions,
 ///   6) one tensor::ExtractSliceOp, tensor::PadOp pair has zero-padding and
 ///      zero-offset for every dimension.
-///   7) the tensor::ExtractSliceOp sizes match the source tensor sizes for the
+///   7) the tensor::ExtractSliceOp sizes match the source tensor sizes for
+///   the
 ///      padded source dimensions.
 ///
 /// Example:
@@ -2276,11 +2556,11 @@
           padOp, "cannot find zero-offset and zero-padding pair");
     }
 
-    // 7) Combine the sizes of the two tensor::ExtractSliceOps. Take the size of
-    // the outer tensor::ExtractSliceOp for the dimensions padded by the outer
-    // tensor::PadOp and fail if the size of the inner tensor::ExtractSliceOp
-    // does not match the size of the padded dimension. Otherwise, take the size
-    // of the inner tensor::ExtractSliceOp.
+    // 7) Combine the sizes of the two tensor::ExtractSliceOps. Take the size
+    // of the outer tensor::ExtractSliceOp for the dimensions padded by the
+    // outer tensor::PadOp and fail if the size of the inner
+    // tensor::ExtractSliceOp does not match the size of the padded dimension.
+    // Otherwise, take the size of the inner tensor::ExtractSliceOp.
     SmallVector<OpFoldResult> newSizes = innerSliceOp.getMixedSizes();
     for (auto &en : enumerate(newSizes)) {
       if (!outerDims.test(en.index()))
@@ -2306,8 +2586,8 @@
         newHighPad[en.index()] = outerPadOp.getMixedHighPad()[en.index()];
     }
 
-    // Create a new tensor::ExtractSliceOp, tensor::PadOp pair that performs the
-    // two paddings in one step.
+    // Create a new tensor::ExtractSliceOp, tensor::PadOp pair that performs
+    // the two paddings in one step.
     auto newSliceOp = rewriter.create<ExtractSliceOp>(
         padOp.getLoc(), outerSliceOp.getSource(), newOffsets, newSizes,
         innerSliceOp.getMixedStrides());
@@ -2397,8 +2677,8 @@
   result.addAttributes(attrs);
 }
 
-/// Build an ParallelInsertSliceOp with mixed static and dynamic entries packed
-/// into a Range vector.
+/// Build an ParallelInsertSliceOp with mixed static and dynamic entries
+/// packed into a Range vector.
 void ParallelInsertSliceOp::build(OpBuilder &b, OperationState &result,
                                   Value source, Value dest,
                                   ArrayRef<Range> ranges,
@@ -2485,7 +2765,8 @@
   if (!constOperand.isa_and_nonnull<IntegerAttr, FloatAttr>())
     return {};
 
-  // SplatElementsAttr::get treats single value for second arg as being a splat.
+  // SplatElementsAttr::get treats single value for second arg as being a
+  // splat.
   return SplatElementsAttr::get(getType(), {constOperand});
 }
 
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
--- a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
@@ -31,9 +31,9 @@
 
     auto padOp = cast<PadOp>(op);
     SmallVector<OpFoldResult> mixedSizes = getAsOpFoldResult(reifiedShapes[0]);
-    Value initTensor = b.create<linalg::InitTensorOp>(
+    Value emptyTensor = b.create<EmptyOp>(
         op->getLoc(), mixedSizes, padOp.getResultType().getElementType());
-    return {initTensor};
+    return {emptyTensor};
   }
 
   SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
diff --git a/mlir/python/mlir/dialects/_linalg_ops_ext.py b/mlir/python/mlir/dialects/_linalg_ops_ext.py
--- a/mlir/python/mlir/dialects/_linalg_ops_ext.py
+++ b/mlir/python/mlir/dialects/_linalg_ops_ext.py
@@ -20,44 +20,6 @@
     return False
 
 
-class InitTensorOp:
-  """Extends the linalg.init_tensor op."""
-
-  def __init__(self,
-               sizes: Union[Sequence[int], Sequence[Value]],
-               element_type: Type,
-               *,
-               loc=None,
-               ip=None):
-    """Constructs an `init_tensor` with either static or dynamic sizes."""
-    context = get_default_loc_context(loc)
-    operands = []
-    attributes = {}
-    # TODO: Refactor the InitTensorOp to take an element type attribute and
-    # then use normal result type inference, unifying the Python and C++ side
-    # with a standard mechanism (versus stashing that in builders).
-    if sizes and isinstance(sizes[0], Value):
-      # Dynamic sizes.
-      operands.extend(sizes)
-      static_size_ints = [-1] * len(sizes)
-      result_type = RankedTensorType.get(static_size_ints, element_type)
-    else:
-      # Static sizes.
-      result_type = RankedTensorType.get(sizes, element_type)
-      static_size_ints = sizes
-
-    i64_type = IntegerType.get_signless(64)
-    attributes["static_sizes"] = ArrayAttr.get(
-        [IntegerAttr.get(i64_type, s) for s in static_size_ints],
-        context=context)
-    op = self.build_generic(results=[result_type],
-                            operands=operands,
-                            attributes=attributes,
-                            loc=loc,
-                            ip=ip)
-    OpView.__init__(self, op)
-
-
 class StructuredOpMixin:
   """All structured ops use the same mixin class."""
 
diff --git a/mlir/python/mlir/dialects/_tensor_ops_ext.py b/mlir/python/mlir/dialects/_tensor_ops_ext.py
new file mode 100644
--- /dev/null
+++ b/mlir/python/mlir/dialects/_tensor_ops_ext.py
@@ -0,0 +1,42 @@
+#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+#  See https://llvm.org/LICENSE.txt for license information.
+#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+try:
+  from ..ir import *
+except ImportError as e:
+  raise RuntimeError("Error loading imports from extension module") from e
+
+from typing import Any, Optional, Sequence, Union
+from ._ods_common import get_op_result_or_value as _get_op_result_or_value, get_op_results_or_values as _get_op_results_or_values
+
+
+class EmptyOp:
+  """Extends the tensor.empty op."""
+
+  def __init__(self,
+               sizes: Sequence[Union[int, Value]],
+               element_type: Type,
+               *,
+               loc=None,
+               ip=None):
+    """Constructs an `empty` with mixed static/dynamic sizes."""
+    # TODO: Refactor the EmptyOp to take an element type attribute and
+    # then use normal result type inference, unifying the Python and C++ side
+    # with a standard mechanism (versus stashing that in builders).
+    dynamic_sizes = []
+    static_sizes = []
+    for s in sizes:
+      if isinstance(s, int):
+        static_sizes.append(s)
+      else:
+        static_sizes.append(-1)
+        dynamic_sizes.append(s)
+    result_type = RankedTensorType.get(static_sizes, element_type)
+    op = self.build_generic(
+        results=[result_type],
+        operands=dynamic_sizes,
+        attributes={},
+        loc=loc,
+        ip=ip)
+    OpView.__init__(self, op)
diff --git a/mlir/test/Conversion/TensorToLinalg/tensor-ops-to-linalg.mlir b/mlir/test/Conversion/TensorToLinalg/tensor-ops-to-linalg.mlir
--- a/mlir/test/Conversion/TensorToLinalg/tensor-ops-to-linalg.mlir
+++ b/mlir/test/Conversion/TensorToLinalg/tensor-ops-to-linalg.mlir
@@ -6,7 +6,7 @@
 // CHECK-LABEL:   func @generalize_pad_tensor_static_shape(
 // CHECK-SAME:                                             %[[IN:.*]]: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> {
 // CHECK:           %[[C0:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK:           %[[INIT:.*]] = linalg.init_tensor [1, 32, 32, 1] : tensor<1x32x32x1xf32>
+// CHECK:           %[[INIT:.*]] = tensor.empty() : tensor<1x32x32x1xf32>
 // CHECK:           %[[FILL:.*]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x32x32x1xf32>) -> tensor<1x32x32x1xf32>
 // CHECK:           %[[PADDED:.*]] = tensor.insert_slice %[[IN]] into %[[FILL]][0, 2, 2, 0] [1, 28, 28, 1] [1, 1, 1, 1] : tensor<1x28x28x1xf32> into tensor<1x32x32x1xf32>
 // CHECK:           return %[[PADDED]] : tensor<1x32x32x1xf32>
@@ -31,7 +31,7 @@
 // CHECK-DAG:       %[[C3:.*]] = arith.constant 3 : index
 // CHECK:           %[[DIM3:.*]] = tensor.dim %[[IN]], %[[C3]] : tensor<4x?x2x?xf32>
 // CHECK:           %[[OUT_DIM3:.*]] = arith.addi %[[DIM3]], %[[OFFSET]] : index
-// CHECK:           %[[INIT:.*]] = linalg.init_tensor [4, %[[DIM1]], %[[OUT_DIM2]], %[[OUT_DIM3]]] : tensor<4x?x?x?xf32>
+// CHECK:           %[[INIT:.*]] = tensor.empty(%[[DIM1]], %[[OUT_DIM2]], %[[OUT_DIM3]]) : tensor<4x?x?x?xf32>
 // CHECK:           %[[FILL:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[INIT]] : tensor<4x?x?x?xf32>) -> tensor<4x?x?x?xf32>
 // CHECK:           %[[PADDED:.*]] = tensor.insert_slice %[[IN]] into %[[FILL]]{{\[}}%[[C0]], %[[C0]], %[[OFFSET]], %[[C0]]] [4, %[[DIM1]], 2, %[[DIM3]]] [1, 1, 1, 1] : tensor<4x?x2x?xf32> into tensor<4x?x?x?xf32>
 // CHECK:           return %[[PADDED]] : tensor<4x?x?x?xf32>
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
@@ -3,7 +3,7 @@
 // CHECK-LABEL: @matmul
 func.func @matmul(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) {
   // CHECK: [[C0:%.+]] = arith.constant 0
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 6]
+  // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : f32) outs([[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
   // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x6xf32>) outs([[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
   %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x6xf32>)  -> (tensor<1x5x6xf32>)
@@ -16,7 +16,7 @@
 // CHECK-LABEL: @matmul_quantized
 func.func @matmul_quantized(%arg0: tensor<1x5x3xi8>, %arg1: tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) {
   // CHECK: [[C0:%.+]] = arith.constant 0
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 6]
+  // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : i32) outs([[INIT]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32>
   // CHECK: [[ONE:%.+]] = arith.constant 1
   // CHECK: [[TWO:%.+]] = arith.constant 2
@@ -32,7 +32,7 @@
   // CHECK: %[[C0:.+]] = arith.constant 0
   // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]]
   // CHECK: %[[C0_0:.+]] = arith.constant 0
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DIM]], 5, 6]
+  // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]])
   // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0_0]] : f32) outs(%[[INIT]] : tensor<?x5x6xf32>) -> tensor<?x5x6xf32>
   // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<?x5x3xf32>, tensor<?x3x6xf32>) outs(%[[FILLED]] : tensor<?x5x6xf32>) -> tensor<?x5x6xf32>
   %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<?x5x3xf32>, tensor<?x3x6xf32>)  -> (tensor<?x5x6xf32>)
@@ -46,7 +46,7 @@
   // CHECK: %[[C2:.+]] = arith.constant 2
   // CHECK: %[[DIM:.+]] = tensor.dim %arg1, %[[C2]]
   // CHECK: %[[C0:.+]] = arith.constant 0
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 5, %[[DIM]]]
+  // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]])
   // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32>
   // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x?xf32>) outs(%[[FILLED]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32>
   %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x?xf32>)  -> (tensor<1x5x?xf32>)
@@ -58,7 +58,7 @@
 // CHECK-LABEL: @matmul_dyn_independent_dim
 func.func @matmul_dyn_independent_dim(%arg0: tensor<1x5x?xf32>, %arg1: tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>) {
   // CHECK: %[[C0:.+]] = arith.constant 0
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 5, 6]
+  // CHECK: %[[INIT:.+]] = tensor.empty()
   // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
   // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x?xf32>, tensor<1x?x6xf32>) outs(%[[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
   %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x?xf32>, tensor<1x?x6xf32>)  -> (tensor<1x5x6xf32>)
@@ -72,12 +72,12 @@
 
 // CHECK-LABEL: @fully_connected
 func.func @fully_connected(%arg0: tensor<5x3xf32>, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor<5x6xf32>) {
-  // CHECK: [[INITT:%.+]] = linalg.init_tensor [5, 6]
+  // CHECK: [[INITT:%.+]] = tensor.empty()
   // CHECK: [[ZERO:%.+]] = arith.constant 0
   // CHECK: [[FILL:%.+]] = linalg.fill ins([[ZERO]]{{.*}}outs([[INITT]]
   // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]>
   // CHECK: [[TRANSPOSE:%.+]] = "tosa.transpose"(%arg1, [[PERM]])
-  // CHECK: [[INITB:%.+]] = linalg.init_tensor [5, 6]
+  // CHECK: [[INITB:%.+]] = tensor.empty()
   // CHECK: [[MATMUL:%.+]] = linalg.matmul ins(%arg0, [[TRANSPOSE]] : tensor<5x3xf32>, tensor<3x6xf32>) outs([[FILL]] : tensor<5x6xf32>) -> tensor<5x6xf32>
   // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xf32>, tensor<5x6xf32>) outs([[INITB]] : tensor<5x6xf32>) {
   // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
@@ -95,12 +95,12 @@
 
 // CHECK-LABEL: @quantized_fully_connected
 func.func @quantized_fully_connected(%arg0: tensor<5x3xi8>, %arg1: tensor<6x3xi8>, %arg2: tensor<6xi32>) -> (tensor<5x6xi32>) {
-  // CHECK: [[INITT:%.+]] = linalg.init_tensor [5, 6]
+  // CHECK: [[INITT:%.+]] = tensor.empty()
   // CHECK: [[ZERO:%.+]] = arith.constant 0
   // CHECK: [[FILL:%.+]] = linalg.fill ins([[ZERO]]{{.*}}outs([[INITT]]
   // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]>
   // CHECK: [[TRANSPOSE:%.+]] = "tosa.transpose"(%arg1, [[PERM]])
-  // CHECK: [[INITB:%.+]] = linalg.init_tensor [5, 6]
+  // CHECK: [[INITB:%.+]] = tensor.empty()
   // CHECK: [[ONE:%.+]] = arith.constant 1
   // CHECK: [[TWO:%.+]] = arith.constant 2
   // CHECK: [[MATMUL:%.+]] = linalg.quantized_matmul ins(%arg0, [[TRANSPOSE]], [[ONE]], [[TWO]] : tensor<5x3xi8>, tensor<3x6xi8>, i32, i32) outs([[FILL]] : tensor<5x6xi32>) -> tensor<5x6xi32>
@@ -121,12 +121,12 @@
 func.func @fully_connected_dyn(%arg0: tensor<?x3xf32>, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor<?x6xf32>) {
   // CHECK: %[[C0:.+]] = arith.constant 0
   // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]]
-  // CHECK: %[[INITT:.+]] = linalg.init_tensor [%[[DIM]], 6]
+  // CHECK: %[[INITT:.+]] = tensor.empty(%[[DIM]])
   // CHECK: %[[ZERO:.+]] = arith.constant 0
   // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[ZERO]]{{.*}}outs(%[[INITT]]
   // CHECK: %[[PERM:.+]] = arith.constant dense<[1, 0]>
   // CHECK: %[[TRANSPOSE:.+]] = "tosa.transpose"(%arg1, %[[PERM]])
-  // CHECK: %[[INITB:.+]] = linalg.init_tensor [%[[DIM]], 6]
+  // CHECK: %[[INITB:.+]] = tensor.empty(%[[DIM]])
   // CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%arg0, %[[TRANSPOSE]] : tensor<?x3xf32>, tensor<3x6xf32>) outs(%[[FILL]] : tensor<?x6xf32>) -> tensor<?x6xf32>
   // CHECK: %[[ADDED:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, %[[MATMUL]] : tensor<6xf32>, tensor<?x6xf32>) outs(%[[INITB]] : tensor<?x6xf32>) {
   // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
@@ -142,9 +142,9 @@
 // CHECK-LABEL: @max_pool
 func.func @max_pool(%arg0: tensor<1x6x34x62xf32>) -> () {
   // CHECK-DAG: [[CONST:%.+]] = arith.constant -3.40282347E+38
-  // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 32, 62]
+  // CHECK-DAG: [[INIT:%.+]] = tensor.empty()
   // CHECK-DAG: [[FILL:%.+]] = linalg.fill ins([[CONST]]{{.*}}outs([[INIT]]
-  // CHECK-DAG: [[KERNEL:%.+]] = linalg.init_tensor [3, 3]
+  // CHECK-DAG: [[KERNEL:%.+]] = tensor.empty()
   // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, [[KERNEL]] : tensor<1x6x34x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x32x62xf32>)
   %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>)  -> (tensor<1x4x32x62xf32>)
   return
@@ -156,9 +156,9 @@
   // CHECK-DAG: [[PAD:%.+]] = tensor.pad %arg0 low[0, 0, 0, 0] high[0, 0, 1, 0]
   // CHECK-DAG:   tensor.yield [[CONST]]
   // CHECK-DAG: [[INITVAL:%.+]] = arith.constant -3.40282347E+38 : f32
-  // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 33, 62]
+  // CHECK-DAG: [[INIT:%.+]] = tensor.empty()
   // CHECK-DAG: [[FILL:%.+]] = linalg.fill ins([[INITVAL]]{{.*}}outs([[INIT]]
-  // CHECK-DAG: [[KERNEL:%.+]] = linalg.init_tensor [3, 3]
+  // CHECK-DAG: [[KERNEL:%.+]] = tensor.empty()
   // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x6x35x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x33x62xf32>)
   %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 1], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>)  -> (tensor<1x4x33x62xf32>)
   return
@@ -169,9 +169,9 @@
   // CHECK: %[[C0:.+]] = arith.constant 0
   // CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]]
   // CHECK: %[[CONST:.+]] = arith.constant -3.40282347E+38
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[BATCH]], 4, 32, 62]
+  // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]])
   // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CONST]]{{.*}}outs(%[[INIT]]
-  // CHECK: %[[KERNEL:.+]] = linalg.init_tensor [3, 3]
+  // CHECK: %[[KERNEL:.+]] = tensor.empty()
   // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %[[KERNEL]] : tensor<?x6x34x62xf32>, tensor<3x3xf32>) outs(%[[FILL]] : tensor<?x4x32x62xf32>)
   %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<?x6x34x62xf32>)  -> (tensor<?x4x32x62xf32>)
   return
@@ -208,11 +208,11 @@
   // CHECK: [[CONST:%.+]] = arith.constant 0
   // CHECK: [[PAD:%.+]] = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
   // CHECK: [[CONST:%.+]] = arith.constant 0
-  // CHECK: [[POOLINIT:%.+]] = linalg.init_tensor [1, 5, 33, 62]
+  // CHECK: [[POOLINIT:%.+]] = tensor.empty()
   // CHECK: [[FILL:%.+]] = linalg.fill ins([[CONST]]{{.*}}outs([[POOLINIT]]
-  // CHECK: [[KERNEL:%.+]] = linalg.init_tensor [4, 4]
+  // CHECK: [[KERNEL:%.+]] = tensor.empty()
   // CHECK: [[POOL:%.+]] = linalg.pooling_nhwc_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x8x36x62xf32>, tensor<4x4xf32>) outs([[FILL]] : tensor<1x5x33x62xf32>)
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 33, 62]
+  // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins([[POOL]] : tensor<1x5x33x62xf32>) outs([[INIT]] : tensor<1x5x33x62xf32>)
   // CHECK:   [[ZERO:%.0]] = arith.constant 0
   // CHECK:   [[ONE:%.+]] = arith.constant 1
@@ -269,11 +269,11 @@
   // CHECK: %[[C0:.+]] = arith.constant 0
   // CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]]
   // CHECK: %[[PAD:.+]] = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
-  // CHECK: %[[POOLINIT:.+]] = linalg.init_tensor [%[[BATCH]], 5, 33, 62]
+  // CHECK: %[[POOLINIT:.+]] = tensor.empty(%[[BATCH]])
   // CHECK: %[[FILL:.+]] = linalg.fill
-  // CHECK: %[[KERNEL:.+]] = linalg.init_tensor [4, 4]
+  // CHECK: %[[KERNEL:.+]] = tensor.empty()
   // CHECK: %[[POOL:.+]] = linalg.pooling_nhwc_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%[[PAD]], %[[KERNEL]] : tensor<?x8x36x62xf32>, tensor<4x4xf32>) outs(%[[FILL]] : tensor<?x5x33x62xf32>)
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[BATCH]], 5, 33, 62]
+  // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]])
   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[POOL]] : tensor<?x5x33x62xf32>) outs(%[[INIT]] : tensor<?x5x33x62xf32>)
   %0 = "tosa.avg_pool2d"(%arg0) {pad = [1, 1, 1, 1], kernel = [4, 4], stride = [1, 1]} : (tensor<?x6x34x62xf32>)  -> (tensor<?x5x33x62xf32>)
   return %0 : tensor<?x5x33x62xf32>
@@ -346,10 +346,10 @@
 func.func @conv2d_f32(%input: tensor<1x49x42x27xf32>, %weights: tensor<28x3x3x27xf32>, %bias: tensor<28xf32>) -> () {
   // CHECK: %[[PERM:.+]] = arith.constant dense<[1, 2, 3, 0]>
   // CHECK: %[[W:.+]] = "tosa.transpose"(%arg1, %[[PERM]])
-  // CHECK: %[[M_IN:.+]] = linalg.init_tensor [1, 45, 40, 28]
+  // CHECK: %[[M_IN:.+]] = tensor.empty()
   // CHECK: %[[CST:.+]] = arith.constant 0
   // CHECK: %[[FILL:.+]] = linalg.fill
-  // CHECK: %[[B_IN:.+]] = linalg.init_tensor [1, 45, 40, 28]
+  // CHECK: %[[B_IN:.+]] = tensor.empty()
   // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<1x49x42x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<1x45x40x28xf32>)
   // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x45x40x28xf32>) outs(%[[B_IN]] : tensor<1x45x40x28xf32>)
   // CHECK:   arith.addf
@@ -369,10 +369,10 @@
   // CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]]
   // CHECK: %[[PERM:.+]] = arith.constant dense<[1, 2, 3, 0]>
   // CHECK: %[[W:.+]] = "tosa.transpose"(%arg1, %[[PERM]])
-  // CHECK: %[[M_IN:.+]] = linalg.init_tensor [%[[BATCH]], 45, 40, 28]
+  // CHECK: %[[M_IN:.+]] = tensor.empty(%[[BATCH]])
   // CHECK: %[[CST:.+]] = arith.constant 0
   // CHECK: %[[FILL:.+]] = linalg.fill
-  // CHECK: %[[B_IN:.+]] = linalg.init_tensor [%[[BATCH]], 45, 40, 28]
+  // CHECK: %[[B_IN:.+]] = tensor.empty(%[[BATCH]])
   // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<?x49x42x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<?x45x40x28xf32>)
   // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<?x45x40x28xf32>) outs(%[[B_IN]] : tensor<?x45x40x28xf32>)
   // CHECK:   %[[ADD:.+]] = arith.addf
@@ -429,10 +429,10 @@
   // Running convolution
   // CHECK: %[[PERM:.+]] = arith.constant dense<[1, 2, 3, 0]>
   // CHECK: %[[WEIGHT:.+]] = "tosa.transpose"(%arg1, %[[PERM]])
-  // CHECK: %[[M_IN:.+]] = linalg.init_tensor [1, %[[H_OUT]], %[[W_OUT]], 28]
+  // CHECK: %[[M_IN:.+]] = tensor.empty(%[[H_OUT]], %[[W_OUT]])
   // CHECK: %[[CST:.+]] = arith.constant 0
   // CHECK: %[[FILL:.+]] = linalg.fill
-  // CHECK: %[[B_IN:.+]] = linalg.init_tensor [1, %[[H_OUT]], %[[W_OUT]], 28]
+  // CHECK: %[[B_IN:.+]] = tensor.empty(%[[H_OUT]], %[[W_OUT]])
   // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[WEIGHT]] : tensor<1x?x?x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<1x?x?x28xf32>)
   // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x?x?x28xf32>) outs(%[[B_IN]] : tensor<1x?x?x28xf32>)
   // CHECK:   %[[ADD:.+]] = arith.addf
@@ -472,10 +472,10 @@
 
 // CHECK-LABEL: @depthwise_conv
 func.func @depthwise_conv(%arg0 : tensor<1x7x5x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () {
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 5, 3, 11]
+  // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[CST0:%.+]] = arith.constant 0
   // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
-  // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 5, 5, 33]
+  // CHECK: [[OUT:%.+]] = tensor.empty()
   // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>)
   // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
   // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) {
@@ -496,10 +496,10 @@
 func.func @depthwise_conv_dyn(%arg0 : tensor<?x7x5x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () {
   // CHECK: %[[C0:.+]] = arith.constant 0
   // CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]]
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[BATCH]], 5, 5, 3, 11]
+  // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]])
   // CHECK: %[[CST0:.+]] = arith.constant 0
   // CHECK: %[[FILL:.+]] = linalg.fill
-  // CHECK: %[[OUT:.+]] = linalg.init_tensor [%[[BATCH]], 5, 5, 33]
+  // CHECK: %[[OUT:.+]] = tensor.empty(%[[BATCH]])
   // CHECK: %[[DEPTH:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<?x7x5x3xf32>, tensor<3x1x3x11xf32>) outs(%[[FILL]] : tensor<?x5x5x3x11xf32>)
   // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
   // CHECK: %[[BIAS:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[COLLAPSED]] : tensor<33xf32>, tensor<?x5x5x33xf32>) outs(%[[OUT]] : tensor<?x5x5x33xf32>) {
@@ -518,10 +518,10 @@
 
 // CHECK-LABEL: @depthwise_conv_strides
 func.func @depthwise_conv_strides(%arg0 : tensor<1x11x9x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () {
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 5, 3, 11]
+  // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[CST0:%.+]] = arith.constant 0
   // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
-  // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 5, 5, 33]
+  // CHECK: [[OUT:%.+]] = tensor.empty()
   // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>)
   // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
   // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) {
@@ -544,10 +544,10 @@
   // CHECK: [[PAD:%.+]] = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
   // CHECK:   tensor.yield [[PADV]]
 
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 12, 12, 4, 128]
+  // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[CST0:%.+]] = arith.constant 0
   // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
-  // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 12, 12, 512]
+  // CHECK: [[OUT:%.+]] = tensor.empty()
   // CHECK: [[C128:%.+]] = arith.constant -128
   // CHECK: [[C42:%.+]] = arith.constant 42
   // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins([[PAD]], %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x12x12x4x128xi32>)
@@ -568,10 +568,10 @@
 
 // CHECK-LABEL: @depthwise_conv_quant_dilations
 func.func @depthwise_conv_quant_dilations(%arg0 : tensor<1x14x14x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () {
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 10, 10, 4, 128]
+  // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[CST0:%.+]] = arith.constant 0
   // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
-  // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 10, 10, 512]
+  // CHECK: [[OUT:%.+]] = tensor.empty()
   // CHECK: [[C128:%.+]] = arith.constant -128
   // CHECK: [[C42:%.+]] = arith.constant 42
   // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x10x10x4x128xi32>)
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
@@ -4,7 +4,7 @@
 
 // CHECK-LABEL: @test_abs
 func.func @test_abs(%arg0: tensor<f32>) -> tensor<f32> {
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [] : tensor<f32>
+  // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<f32>
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = []} ins(%arg0 : tensor<f32>) outs([[INIT]] : tensor<f32>) {
   // CHECK: ^bb0(%arg1: f32, %arg2: f32):
   // CHECK:   [[ELEMENT:%.+]] = math.absf %arg1
@@ -23,7 +23,7 @@
 
 // CHECK-LABEL: @test_abs
 func.func @test_abs(%arg0: tensor<2xf32>) -> tensor<2xf32> {
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [2] : tensor<2xf32>
+  // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xf32>
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%arg0 : tensor<2xf32>) outs([[INIT]] : tensor<2xf32>) {
   // CHECK: ^bb0(%arg1: f32, %arg2: f32):
   // CHECK:   [[ELEMENT:%.+]] = math.absf %arg1
@@ -41,7 +41,7 @@
 
 // CHECK-LABEL: @test_abs
 func.func @test_abs(%arg0: tensor<2x3xf32>) -> tensor<2x3xf32> {
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [2, 3] : tensor<2x3xf32>
+  // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2x3xf32>
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%arg0 : tensor<2x3xf32>) outs([[INIT]] : tensor<2x3xf32>) {
   // CHECK: ^bb0(%arg1: f32, %arg2: f32):
   // CHECK:   [[ELEMENT:%.+]] = math.absf %arg1
@@ -59,7 +59,7 @@
 func.func @test_abs(%arg0: tensor<?xf32>) -> tensor<?xf32> {
   // CHECK: %[[C0:.+]] = arith.constant 0
   // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]]
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DIM]]]
+  // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]])
   // CHECK: linalg.generic
   // CHECK: math.absf
   %0 = "tosa.abs"(%arg0) : (tensor<?xf32>) -> tensor<?xf32>
@@ -74,7 +74,7 @@
 func.func @test_abs_dyn(%arg0: tensor<2x?xf32>) -> tensor<2x?xf32> {
   // CHECK: %[[C1:.+]] = arith.constant 1
   // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C1]]
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [2, %[[DIM]]]
+  // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]])
   // CHECK: linalg.generic
   // CHECK: math.absf
   %0 = "tosa.abs"(%arg0) : (tensor<2x?xf32>) -> tensor<2x?xf32>
@@ -88,7 +88,7 @@
 
 // CHECK-LABEL: @test_broadcast
 func.func @test_broadcast(%arg0: tensor<1xf32>, %arg1: tensor<2xf32>) -> tensor<2xf32> {
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [2] : tensor<2xf32>
+  // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xf32>
   // CHECK: [[RESHAPE:%.+]] = tensor.collapse_shape %arg0
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins([[RESHAPE]], %arg1 : tensor<f32>, tensor<2xf32>) outs([[INIT]] : tensor<2xf32>) {
   // CHECK: ^bb0(%arg2: f32, %arg3: f32, %arg4: f32):
@@ -106,7 +106,7 @@
 
 // CHECK-LABEL: @test_broadcast_swapped_args
 func.func @test_broadcast_swapped_args(%arg0: tensor<2xf32>, %arg1: tensor<1xf32>) -> tensor<2xf32> {
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [2] : tensor<2xf32>
+  // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xf32>
   // CHECK: [[RESHAPE:%.+]] = tensor.collapse_shape %arg1
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%arg0, [[RESHAPE]] : tensor<2xf32>, tensor<f32>) outs([[INIT]] : tensor<2xf32>) {
   // CHECK: ^bb0(%arg2: f32, %arg3: f32, %arg4: f32):
@@ -125,7 +125,7 @@
 
 // CHECK-LABEL: @test_multibroadcast
 func.func @test_multibroadcast(%arg0: tensor<1x3xf32>, %arg1: tensor<2x1xf32>) -> tensor<2x3xf32> {
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [2, 3] : tensor<2x3xf32>
+  // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2x3xf32>
   // CHECK: [[RESHAPE1:%.+]] = tensor.collapse_shape %arg0 {{\[}}[0, 1]]
   // CHECK: [[RESHAPE2:%.+]] = tensor.collapse_shape %arg1 {{\[}}[0, 1]]
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins([[RESHAPE1]], [[RESHAPE2]] : tensor<3xf32>, tensor<2xf32>) outs([[INIT]] : tensor<2x3xf32>) {
@@ -630,7 +630,7 @@
 // CHECK-SAME: ([[ARG0:%.+]]: tensor<1x2x3xi32>)
 func.func @test_transpose(%arg0: tensor<1x2x3xi32>) -> () {
   %0 = arith.constant dense<[1, 2, 0]> : tensor<3xi32>
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [2, 3, 1]
+  // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2x3x1xi32>
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel"]} ins([[ARG0]] : tensor<1x2x3xi32>) outs([[OUT:%.+]] : tensor<2x3x1xi32>)
   // CHECK: ^bb0([[ARG1:%.+]]: i32, [[ARG2:%.+]]: i32)
   // CHECK:   linalg.yield [[ARG1]]
@@ -650,7 +650,7 @@
   %0 = arith.constant dense<[1, 3, 0, 2]> : tensor<4xi32>
   // CHECK: %[[C1:.+]] = arith.constant 1
   // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C1]]
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DIM]], 4, 1, 3]
+  // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]]) : tensor<?x4x1x3xi32>
   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<1x?x3x4xi32>) outs([[OUT:%.+]] : tensor<?x4x1x3xi32>)
   // CHECK: ^bb0([[ARG1:%.+]]: i32, [[ARG2:%.+]]: i32)
   // CHECK:   linalg.yield [[ARG1]]
@@ -672,7 +672,7 @@
   // CHECK: %[[DIM0:.+]] = tensor.dim %arg0, %[[C0]]
   // CHECK: %[[C1:.+]] = arith.constant 1
   // CHECK: %[[DIM1:.+]] = tensor.dim %arg0, %[[C1]]
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DIM1]], %[[DIM0]]]
+  // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM1]], %[[DIM0]])
   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x?xf32>) outs([[OUT:%.+]] : tensor<?x?xf32>)
   // CHECK: ^bb0([[ARG1:%.+]]: f32, [[ARG2:%.+]]: f32)
   // CHECK:   linalg.yield [[ARG1]]
@@ -690,7 +690,7 @@
 // CHECK-LABEL: @reduce_float
 // CHECK-SAME: [[ARG0:%.+]]: tensor<5x4xf32>
 func.func @reduce_float(%arg0: tensor<5x4xf32>) -> () {
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [4]
+  // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<4xf32>
   // CHECK: [[CST0:%.+]] = arith.constant 0.0
   // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xf32>) outs([[FILL]] : tensor<4xf32>)
@@ -700,7 +700,7 @@
   // CHECK: tensor.expand_shape [[GENERIC]] {{\[}}[0, 1]] : tensor<4xf32> into tensor<1x4xf32>
   %0 = "tosa.reduce_sum"(%arg0) {axis = 0 : i64} : (tensor<5x4xf32>) -> tensor<1x4xf32>
 
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [5]
+  // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<5xf32>
   // CHECK: [[CST0:%.+]] = arith.constant 0.0
   // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP2]]], iterator_types = ["parallel", "reduction"]} ins([[ARG0]] : tensor<5x4xf32>) outs([[FILL]] : tensor<5xf32>)
@@ -739,7 +739,7 @@
 func.func @reduce_float_dyn(%arg0: tensor<?x5x4xf32>) -> () {
   // CHECK: %[[C0:.+]] = arith.constant 0
   // CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[C0]]
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DYN]], 4]
+  // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor<?x4xf32>
   // CHECK: %[[CST0:.+]] = arith.constant 0.0
   // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST0]]{{.*}}outs(%[[INIT]]
   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "reduction", "parallel"]} ins(%arg0 : tensor<?x5x4xf32>) outs(%[[FILL]] : tensor<?x4xf32>)
@@ -761,7 +761,7 @@
 func.func @reduce_float_dyn_nonzero_batch(%arg0: tensor<5x?x4xf32>) -> () {
   // CHECK: %[[C1:.+]] = arith.constant 1
   // CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[C1]]
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [5, %[[DYN]]]
+  // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor<5x?xf32>
   // CHECK: %[[CST1:.+]] = arith.constant 1.0
   // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST1]]{{.*}}outs(%[[INIT]]
   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "reduction"]} ins(%arg0 : tensor<5x?x4xf32>) outs(%[[FILL]] : tensor<5x?xf32>)
@@ -783,7 +783,7 @@
 func.func @reduce_float_dyn_multiple(%arg0: tensor<?x?xf32>) -> () {
   // CHECK: %[[C0:.+]] = arith.constant 0
   // CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[C0]]
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DYN]]]
+  // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
   // CHECK: %[[CMIN:.+]] = arith.constant -3.40282347E+38
   // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CMIN]]{{.*}}outs(%[[INIT]]
   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor<?x?xf32>) outs(%[[FILL]] : tensor<?xf32>)
@@ -804,7 +804,7 @@
 // CHECK-LABEL: @reduce_int
 // CHECK-SAME: [[ARG0:%.+]]: tensor<5x4xi32>
 func.func @reduce_int(%arg0: tensor<5x4xi32>) -> () {
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [4]
+  // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[CST0:%.+]] = arith.constant 0
   // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xi32>) outs([[FILL]] : tensor<4xi32>)
@@ -814,7 +814,7 @@
   // CHECK: tensor.expand_shape [[GENERIC]] {{\[}}[0, 1]] : tensor<4xi32> into tensor<1x4xi32>
   %0 = "tosa.reduce_sum"(%arg0) {axis = 0 : i64} : (tensor<5x4xi32>) -> tensor<1x4xi32>
 
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [5]
+  // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[CST0:%.+]] = arith.constant 0
   // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP2]]], iterator_types = ["parallel", "reduction"]} ins([[ARG0]] : tensor<5x4xi32>) outs([[FILL]] : tensor<5xi32>)
@@ -854,7 +854,7 @@
 // CHECK-LABEL: @reduce_bool
 // CHECK-SAME: [[ARG0:%.+]]: tensor<5x4xi1>
 func.func @reduce_bool(%arg0: tensor<5x4xi1>) -> () {
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [4]
+  // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[CST0:%.+]] = arith.constant true
   // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xi1>) outs([[FILL]] : tensor<4xi1>)
@@ -882,7 +882,7 @@
   // CHECK: [[OFFSET:%.+]] = arith.constant 0 : index
   // CHECK: [[IDX0:%.+]] = arith.constant 0 : index
   // CHECK: [[IDX1:%.+]] = arith.constant 1 : index
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [11, 1]
+  // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<11x1xf32>
   // CHECK: [[CST:%.+]] = arith.constant 0.0
   // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST]]{{.*}}outs([[INIT]]
   // CHECK: [[INSERT0:%.+]] = tensor.insert_slice %arg0 into [[FILL]][0, 0] [5, 1] [1, 1]
@@ -894,7 +894,7 @@
   // CHECK: [[OFFSET:%.+]] = arith.constant 0 : index
   // CHECK: [[IDX0:%.+]] = arith.constant 0 : index
   // CHECK: [[IDX1:%.+]] = arith.constant 1 : index
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [5, 2]
+  // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<5x2xf32>
   // CHECK: [[CST:%.+]] = arith.constant 0.0
   // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST]]{{.*}}outs([[INIT]]
   // CHECK: [[INSERT0:%.+]] = tensor.insert_slice %arg0 into [[FILL]][0, 0] [5, 1] [1, 1]
@@ -915,7 +915,7 @@
   // CHECK: %[[SIZE:.+]] = tensor.dim %arg0, %[[IDX1]]
   // CHECK: %[[IDX1_2:.+]] = arith.constant 1 : index
   // CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[IDX1_2]]
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [11, %[[DYN]]]
+  // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor<11x?xf32>
   // CHECK: %[[CST:.+]] = arith.constant 0.0
   // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST]]{{.*}}outs(%[[INIT]]
   // CHECK: %[[INSERT0:.+]] = tensor.insert_slice %arg0 into %[[FILL]][0, 0] [5, %[[SIZE]]] [1, 1]
@@ -936,7 +936,7 @@
   // CHECK: %[[IDX0_2:.+]] = arith.constant 0 : index
   // CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[IDX0_2]]
   // CHECK: %[[IDX1:.+]] = arith.constant 1 : index
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DYN]], 3]
+  // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor<?x3xf32>
   // CHECK: %[[CST:.+]] = arith.constant 0.0
   // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST]]{{.*}}outs(%[[INIT]]
   // CHECK: %[[DYN1:.+]] = tensor.dim %arg0, %[[AXIS]]
@@ -955,7 +955,7 @@
 func.func @rescale_i8(%arg0 : tensor<2xi8>) -> () {
   // CHECK: [[C0:%.+]] = arith.constant 19689
   // CHECK: [[C1:%.+]] = arith.constant 15
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [2]
+  // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%arg0 : tensor<2xi8>) outs([[INIT]] : tensor<2xi8>)
   // CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: i8):
   // CHECK: [[C17:%.+]] = arith.constant 17
@@ -976,7 +976,7 @@
 
   // CHECK: [[C0:%.+]] = arith.constant 19689
   // CHECK: [[C1:%.+]] = arith.constant 15
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [2]
+  // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%arg0 : tensor<2xi8>) outs([[INIT]] : tensor<2xui8>)
   // CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: ui8):
   // CHECK: [[C17:%.+]] = arith.constant 17
@@ -1008,13 +1008,13 @@
 func.func @rescale_i8_dyn_batch(%arg0 : tensor<?x2xi8>) -> () {
   // CHECK: %[[C0:.+]] = arith.constant 0
   // CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]]
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[BATCH]], 2]
+  // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]]) : tensor<?x2xi8>
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%arg0 : tensor<?x2xi8>) outs(%[[INIT]] : tensor<?x2xi8>)
   %0 = "tosa.rescale"(%arg0) {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = [19689 : i32], shift = [15 : i32], scale32 = false, double_round = false, per_channel = false} : (tensor<?x2xi8>)  -> (tensor<?x2xi8>)
 
   // CHECK: %[[C0:.+]] = arith.constant 0
   // CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]]
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[BATCH]], 2]
+  // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]]) : tensor<?x2xui8>
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%arg0 : tensor<?x2xi8>) outs(%[[INIT]] : tensor<?x2xui8>)
   %1 = "tosa.rescale"(%arg0) {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = [19689 : i32], shift = [15 : i32], scale32 = false, double_round = false, per_channel = false} : (tensor<?x2xi8>)  -> (tensor<?x2xui8>)
 
@@ -1031,7 +1031,7 @@
   // CHECK: %[[DIM1:.+]] = tensor.dim %arg0, %[[C1]]
   // CHECK: %[[C2:.+]] = arith.constant 2
   // CHECK: %[[DIM2:.+]] = tensor.dim %arg0, %[[C2]]
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, %[[DIM1]], %[[DIM2]], 32]
+  // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM1]], %[[DIM2]])
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<1x?x?x32xi32>) outs(%[[INIT]] : tensor<1x?x?x32xi8>)
   %0 = "tosa.rescale"(%arg0) {double_round = true, input_zp = 0 : i32, multiplier = [1376784203 : i32], output_zp = 0 : i32, per_channel = false, scale32 = true, shift = [38 : i32]} : (tensor<1x?x?x32xi32>) -> tensor<1x?x?x32xi8>
   return
@@ -1045,7 +1045,7 @@
 func.func @rescale_ui8(%arg0 : tensor<2xui8>) -> () {
   // CHECK: [[C0:%.+]] = arith.constant 19689
   // CHECK: [[C1:%.+]] = arith.constant 15
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [2]
+  // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%arg0 : tensor<2xui8>) outs([[INIT]] : tensor<2xi8>)
   // CHECK: ^bb0([[IN:%.+]]: ui8, [[UNUSED:%.+]]: i8):
   // CHECK: [[C17:%.+]] = arith.constant 17
@@ -1076,7 +1076,7 @@
 func.func @rescale_per_channel(%arg0 : tensor<3xi8>) -> (tensor<3xi8>) {
   // CHECK: [[MULTIPLIERS:%.+]] = arith.constant dense<[42, 43, 0]>
   // CHECK: [[SHIFTS:%.+]] = arith.constant dense<[14, 15, 0]>
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [3]
+  // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%arg0, [[MULTIPLIERS]], [[SHIFTS]] : tensor<3xi8>, tensor<3xi32>, tensor<3xi8>) outs([[INIT]] : tensor<3xi8>)
   // CHECK: ^bb0([[IN:%.+]]: i8, [[MULTIPLIER:%.+]]: i32, [[SHIFT:%.+]]: i8, [[UNUSED:%.+]]: i8):
   // CHECK: [[C243:%.+]] = arith.constant 243
@@ -1128,7 +1128,7 @@
 func.func @reverse(%arg0: tensor<5x4xi32>) -> () {
   // CHECK: %[[C0:.+]] = arith.constant 0
   // CHECK: %[[RDIM:.+]] = tensor.dim %arg0, %[[C0]]
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [5, 4]
+  // CHECK: %[[INIT:.+]] = tensor.empty()
   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} outs(%[[INIT]] : tensor<5x4xi32>)
   // CHECK-DAG:   %[[I0:.+]] = linalg.index 0
   // CHECK-DAG:   %[[I1:.+]] = linalg.index 1
@@ -1141,7 +1141,7 @@
 
   // CHECK: %[[C1:.+]] = arith.constant 1
   // CHECK: %[[RDIM:.+]] = tensor.dim %arg0, %[[C1]]
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [5, 4]
+  // CHECK: %[[INIT:.+]] = tensor.empty()
   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} outs(%[[INIT]] : tensor<5x4xi32>)
   // CHECK-DAG:   %[[I0:.+]] = linalg.index 0
   // CHECK-DAG:   %[[I1:.+]] = linalg.index 1
@@ -1164,7 +1164,7 @@
   // CHECK: %[[D0_1:.+]] = tensor.dim %arg0, %[[C0_1]]
   // CHECK: %[[C0_2:.+]] = arith.constant 0
   // CHECK: %[[D0_2:.+]] = tensor.dim %arg0, %[[C0_2]]
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[D0_1]]]
+  // CHECK: %[[INIT:.+]] = tensor.empty(%[[D0_1]])
   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel"]} outs(%[[INIT]] : tensor<?xi32>)
   // CHECK-DAG:   %[[I0:.+]] = linalg.index 0
   // CHECK-DAG:   %[[SUB1:.+]] = arith.constant 1
@@ -1183,19 +1183,19 @@
 
 // CHECK-LABEL: @tile
 func.func @tile(%arg0 : tensor<2x3xi8>) -> () {
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [2, 2, 1, 3]
+  // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<2x3xi8>) outs([[INIT]] : tensor<2x2x1x3xi8>)
   // CHECK:   linalg.yield %arg1 : i8
   // CHECK: tensor.collapse_shape [[GENERIC]] {{\[}}[0, 1, 2], [3]]
   %0 = "tosa.tile"(%arg0) {multiples = [2, 1]} : (tensor<2x3xi8>)  -> (tensor<4x3xi8>)
 
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 2, 2, 3]
+  // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<2x3xi8>) outs([[INIT]] : tensor<1x2x2x3xi8>)
   // CHECK:   linalg.yield %arg1 : i8
   // CHECK: tensor.collapse_shape [[GENERIC]] {{\[}}[0, 1], [2, 3]]
   %1 = "tosa.tile"(%arg0) {multiples = [1, 2]} : (tensor<2x3xi8>)  -> (tensor<2x6xi8>)
 
-  // CHECK: [[INIT:%.+]] = linalg.init_tensor [5, 2, 7, 3]
+  // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<2x3xi8>) outs([[INIT]] : tensor<5x2x7x3xi8>)
   // CHECK:   linalg.yield %arg1 : i8
   // CHECK: tensor.collapse_shape [[GENERIC]] {{\[}}[0, 1], [2, 3]]
@@ -1213,7 +1213,7 @@
 func.func @tile_dyn_input(%arg0 : tensor<?x3xi8>) -> () {
   // CHECK: %[[CST0:.+]] = arith.constant 0
   // CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[CST0]] : tensor<?x3xi8>
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [2, %[[DYN]], 1, 3]
+  // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<?x3xi8>) outs(%[[INIT]] : tensor<2x?x1x3xi8>)
   // CHECK:   linalg.yield %arg1 : i8
   // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[GENERIC]] {{\[}}[0, 1, 2, 3]]
@@ -1232,7 +1232,7 @@
 func.func @tile_dyn_multiples(%arg0 : tensor<2x3xi8>) -> () {
   // CHECK: %[[CST1:.+]] = arith.constant 1
   // CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[CST1]] : tensor<2x3xi8>
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [2, 2, %[[DYN]], 3]
+  // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<2x3xi8>) outs(%[[INIT]] : tensor<2x2x?x3xi8>)
   // CHECK:   linalg.yield %arg1 : i8
   // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[GENERIC]] {{\[}}[0, 1, 2, 3]]
@@ -1340,10 +1340,10 @@
 // CHECK: #[[$MAP4:.*]] = affine_map<(d0) -> ()>
 
 func.func @argmax(%arg0 : tensor<3x2xi32>, %arg1 : tensor<6xf32>) -> () {
-  // CHECK: [[IDX_INIT:%.+]] = linalg.init_tensor [2]
+  // CHECK: [[IDX_INIT:%.+]] = tensor.empty()
   // CHECK: [[IDX_MIN:%.+]] = arith.constant 0 : i32
   // CHECK: [[IDX_FILL:%.+]] = linalg.fill ins([[IDX_MIN]]{{.*}}outs([[IDX_INIT]]
-  // CHECK: [[VAL_INIT:%.+]] = linalg.init_tensor [2]
+  // CHECK: [[VAL_INIT:%.+]] = tensor.empty()
   // CHECK: [[VAL_MIN:%.+]] = arith.constant -2147483648
   // CHECK: [[VAL_FILL:%.+]] = linalg.fill ins([[VAL_MIN]]{{.*}}outs([[VAL_INIT]]
   // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%arg0 : tensor<3x2xi32>) outs([[IDX_FILL]], [[VAL_FILL]] : tensor<2xi32>, tensor<2xi32>)
@@ -1355,10 +1355,10 @@
   // CHECK:   linalg.yield [[SELECT_IDX]], [[SELECT_VAL]]
   %0 = "tosa.argmax"(%arg0) { axis = 0 : i64} : (tensor<3x2xi32>)  -> (tensor<2xi32>)
 
-  // CHECK: [[IDX_INIT:%.+]] = linalg.init_tensor [3]
+  // CHECK: [[IDX_INIT:%.+]] = tensor.empty()
   // CHECK: [[IDX_MIN:%.+]] = arith.constant 0 : i32
   // CHECK: [[IDX_FILL:%.+]] = linalg.fill ins([[IDX_MIN]]{{.*}}outs([[IDX_INIT]]
-  // CHECK: [[VAL_INIT:%.+]] = linalg.init_tensor [3]
+  // CHECK: [[VAL_INIT:%.+]] = tensor.empty()
   // CHECK: [[VAL_MIN:%.+]] = arith.constant -2147483648
   // CHECK: [[VAL_FILL:%.+]] = linalg.fill ins([[VAL_MIN]]{{.*}}outs([[VAL_INIT]]
   // CHECK: linalg.generic {indexing_maps = [#map0, #map2, #map2], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor<3x2xi32>) outs([[IDX_FILL]], [[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>)
@@ -1390,10 +1390,10 @@
 func.func @argmax_dyn_non_axis(%arg0 : tensor<3x?xi32>) -> () {
   // CHECK: %[[CST1:.+]] = arith.constant 1
   // CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[CST1]]
-  // CHECK: %[[IDX_INIT:.+]] = linalg.init_tensor [%[[DYN]]]
+  // CHECK: %[[IDX_INIT:.+]] = tensor.empty(%[[DYN]])
   // CHECK: %[[IDX_MIN:.+]] = arith.constant 0 : i32
   // CHECK: %[[IDX_FILL:.+]] = linalg.fill ins(%[[IDX_MIN]]{{.*}}outs(%[[IDX_INIT]]
-  // CHECK: %[[VAL_INIT:.+]] = linalg.init_tensor [%[[DYN]]]
+  // CHECK: %[[VAL_INIT:.+]] = tensor.empty(%[[DYN]])
   // CHECK: %[[VAL_MIN:.+]] = arith.constant -2147483648
   // CHECK: %[[VAL_FILL:.+]] = linalg.fill ins(%[[VAL_MIN]]{{.*}}outs(%[[VAL_INIT]]
   // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%arg0 : tensor<3x?xi32>) outs(%[[IDX_FILL]], %[[VAL_FILL]] : tensor<?xi32>, tensor<?xi32>)
@@ -1413,10 +1413,10 @@
 // CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0)>
 
 func.func @argmax_dyn_axis(%arg0 : tensor<3x?xi32>) -> () {
-  // CHECK: %[[IDX_INIT:.+]] = linalg.init_tensor [3]
+  // CHECK: %[[IDX_INIT:.+]] = tensor.empty()
   // CHECK: %[[IDX_MIN:.+]] = arith.constant 0 : i32
   // CHECK: %[[IDX_FILL:.+]] = linalg.fill ins(%[[IDX_MIN]]{{.*}}outs(%[[IDX_INIT]]
-  // CHECK: %[[VAL_INIT:.+]] = linalg.init_tensor [3]
+  // CHECK: %[[VAL_INIT:.+]] = tensor.empty()
   // CHECK: %[[VAL_MIN:.+]] = arith.constant -2147483648
   // CHECK: %[[VAL_FILL:.+]] = linalg.fill ins(%[[VAL_MIN]]{{.*}}outs(%[[VAL_INIT]]
   // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor<3x?xi32>) outs(%[[IDX_FILL]], %[[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>)
@@ -1434,7 +1434,7 @@
 
 // CHECK-LABEL: @gather_float
 func.func @gather_float(%arg0: tensor<2x3x2xf32>, %arg1: tensor<2x3xi32>) -> () {
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [2, 3, 2]
+  // CHECK: %[[INIT:.+]] = tensor.empty()
   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg1 : tensor<2x3xi32>) outs(%[[INIT]] : tensor<2x3x2xf32>)
   // CHECK: ^bb0(%[[ARG0:.+]]: i32, %[[ARG1:.+]]: f32)
   // CHECK:   %[[IDX0:.+]] = linalg.index 0
@@ -1450,7 +1450,7 @@
 func.func @gather_float_dyn(%arg0: tensor<?x3x2xf32>, %arg1: tensor<?x3xi32>) -> () {
   // CHECK: %[[C0:.+]] = arith.constant 0
   // CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]]
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[BATCH]], 3, 2]
+  // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]])
   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg1 : tensor<?x3xi32>) outs(%[[INIT]] : tensor<?x3x2xf32>)
   // CHECK: ^bb0(%[[ARG0:.+]]: i32, %[[ARG1:.+]]: f32)
   // CHECK:   %[[IDX0:.+]] = linalg.index 0
@@ -1464,7 +1464,7 @@
 
 // CHECK-LABEL: @gather_int
 func.func @gather_int(%arg0: tensor<2x3x2xi32>, %arg1: tensor<2x3xi32>) -> () {
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [2, 3, 2]
+  // CHECK: %[[INIT:.+]] = tensor.empty()
   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg1 : tensor<2x3xi32>) outs(%[[INIT]] : tensor<2x3x2xi32>)
   // CHECK: ^bb0(%[[ARG0:.+]]: i32, %[[ARG1:.+]]: i32)
   // CHECK:   %[[IDX0:.+]] = linalg.index 0
@@ -1480,7 +1480,7 @@
 
 // CHECK-LABEL: @table8
 func.func @table8(%arg0: tensor<6xi8>, %arg1: tensor<512xi8>) -> () {
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [6]
+  // CHECK: %[[INIT:.+]] = tensor.empty()
   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg0 : tensor<6xi8>) outs(%[[INIT]] : tensor<6xi8>)
   // CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8)
   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[ARG_IN]]
@@ -1496,7 +1496,7 @@
 
 // CHECK-LABEL: @table16
 func.func @table16(%arg0: tensor<6xi16>, %arg1: tensor<513xi16>) -> () {
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [6]
+  // CHECK: %[[INIT:.+]] = tensor.empty()
   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg0 : tensor<6xi16>) outs(%[[INIT]] : tensor<6xi32>)
   // CHECK: ^bb0(%arg2: i16, %arg3: i32)
   // CHECK: %[[EXT_IN:.+]] = arith.extsi %arg2
@@ -1529,7 +1529,7 @@
 func.func @table8_dyn(%arg0: tensor<?xi8>, %arg1: tensor<512xi8>) -> () {
   // CHECK: %[[CST0:.+]] = arith.constant 0
   // CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[CST0]]
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DYN]]]
+  // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg0 : tensor<?xi8>) outs(%[[INIT]] : tensor<?xi8>)
   // CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8)
   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[ARG_IN]]
@@ -1545,7 +1545,7 @@
 
 // CHECK-LABEL: @table8_dyn_table
 func.func @table8_dyn_table(%arg0: tensor<6xi8>, %arg1: tensor<?xi8>) -> () {
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [6]
+  // CHECK: %[[INIT:.+]] = tensor.empty()
   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg0 : tensor<6xi8>) outs(%[[INIT]] : tensor<6xi8>)
   // CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8)
   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[ARG_IN]]
@@ -1561,7 +1561,7 @@
 
 // CHECK-LABEL: @resize_nearest
 func.func @resize_nearest(%input: tensor<1x2x2x1xf32>) -> () {
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 4, 4, 1]
+  // CHECK: %[[INIT:.+]] = tensor.empty()
   // CHECK: %[[GENERIC:.+]] = linalg.generic
   // CHECK: %[[IDX0:.+]] = linalg.index 0
   // CHECK: %[[IDX1:.+]] = linalg.index 1
@@ -1630,7 +1630,7 @@
 
 // CHECK-LABEL: @resize_bilinear
 func.func @resize_bilinear(%input: tensor<1x2x2x1xf32>) -> () {
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 4, 4, 1]
+  // CHECK: %[[INIT:.+]] = tensor.empty()
   // CHECK: %[[GENERIC:.+]] = linalg.generic
   // CHECK: %[[IDX0:.+]] = linalg.index 0
   // CHECK: %[[IDX1:.+]] = linalg.index 1
@@ -1712,7 +1712,7 @@
 
 // CHECK-LABEL: @resize_nearest_int
 func.func @resize_nearest_int(%input: tensor<1x2x2x1xi32>) -> () {
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 4, 4, 1]
+  // CHECK: %[[INIT:.+]] = tensor.empty()
   // CHECK: %[[GENERIC:.+]] = linalg.generic
   // CHECK: %[[IDX0:.+]] = linalg.index 0
   // CHECK: %[[IDX1:.+]] = linalg.index 1
@@ -1780,7 +1780,7 @@
 
 // CHECK-LABEL: @resize_bilinear_int
 func.func @resize_bilinear_int(%input: tensor<1x2x2x1xi8>) -> () {
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 4, 4, 1]
+  // CHECK: %[[INIT:.+]] = tensor.empty()
   // CHECK: %[[GENERIC:.+]] = linalg.generic
 
   // CHECK: %[[IDX0:.+]] = linalg.index 0
@@ -1867,7 +1867,7 @@
 func.func @resize_dyn(%input: tensor<?x2x2x1xi8>) -> () {
     // CHECK: %[[C0:.+]] = arith.constant 0
   // CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]]
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[BATCH]], 4, 4, 1]
+  // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]])
   // CHECK: %[[GENERIC:.+]] = linalg.generic
   %output = "tosa.resize"(%input) { output_size = [4, 4], stride = [128, 128], offset = [1, 2], stride_fp = [0. : f32, 0. : f32], offset_fp = [0. : f32, 0. : f32], shift = 8 : i32, mode = "BILINEAR" } : (tensor<?x2x2x1xi8>)  -> (tensor<?x4x4x1xi32>)
   return
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir
@@ -47,7 +47,8 @@
   %f0 = arith.constant 0.0: f32
 
   // alloc_tensor itself does not alloc but forwards to the insert_slice.
-  // InitTensorOp replaces the alloc_tensor with an inplace extract_slice.
+  // AllocTensorOpElimination replaces the alloc_tensor with an inplace
+  // extract_slice.
   // CHECK: %[[T_SUBVIEW:.*]] =  memref.subview %[[FUNC_ARG]][42] [%[[sz]]] [1]
   %a = bufferization.alloc_tensor(%sz) : tensor<?xf32>
 
diff --git a/mlir/test/Dialect/Linalg/bubble-up-extract-slice-op.mlir b/mlir/test/Dialect/Linalg/bubble-up-extract-slice-op.mlir
--- a/mlir/test/Dialect/Linalg/bubble-up-extract-slice-op.mlir
+++ b/mlir/test/Dialect/Linalg/bubble-up-extract-slice-op.mlir
@@ -135,7 +135,7 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.0 : f32
 
-  %init = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
+  %init = tensor.empty() : tensor<1x112x112x32xf32>
   %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
 
   %conv = linalg.conv_2d_nhwc_hwcf
@@ -149,7 +149,7 @@
 }
 
 // CHECK: func @conv_slice
-// CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
+// CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x112x112x32xf32>
 // CHECK: %[[SLICE0:.+]] = tensor.extract_slice %arg0[0, 128, 128, 0] [1, 65, 65, 3] [1, 1, 1, 1] : tensor<1x225x225x3xf32> to tensor<1x65x65x3xf32>
 // CHECK: %[[SLICE1:.+]] = tensor.extract_slice %arg1[0, 0, 0, 16] [3, 3, 3, 16] [1, 1, 1, 1] : tensor<3x3x3x32xf32> to tensor<3x3x3x16xf32>
 // CHECK: %[[SLICE2:.+]] = tensor.extract_slice %[[INIT]][0, 64, 64, 16] [1, 32, 32, 16] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x32x32x16xf32>
@@ -162,7 +162,7 @@
 // The slice is not supposed to be bubbled up when it is rank-reducing.
 func.func @rank_reducing_slice(%width : index) -> tensor<1x1x1x?xf32> {
   %cst = arith.constant 1.000000e+00 : f32
-  %init = linalg.init_tensor [1, %width] : tensor<1x?xf32>
+  %init = tensor.empty(%width) : tensor<1x?xf32>
   %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x?xf32>) -> tensor<1x?xf32>
   %slice = tensor.extract_slice %fill[0, 0] [1, %width] [1, 1] : tensor<1x?xf32> to tensor<?xf32>
   %expand = tensor.expand_shape %slice [[0, 1, 2, 3]] : tensor<?xf32> into tensor<1x1x1x?xf32>
@@ -170,7 +170,7 @@
 }
 
 // CHECK: func @rank_reducing_slice
-// CHECK: %[[INIT:.+]] = linalg.init_tensor
+// CHECK: %[[INIT:.+]] = tensor.empty
 // CHECK: %[[FILL:.+]] = linalg.fill ins
 // CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[FILL]]
 // CHECK: %[[EXPAND:.+]] = tensor.expand_shape %[[SLICE]]
diff --git a/mlir/test/Dialect/Linalg/bufferize.mlir b/mlir/test/Dialect/Linalg/bufferize.mlir
--- a/mlir/test/Dialect/Linalg/bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/bufferize.mlir
@@ -41,18 +41,18 @@
 
 #map0 = affine_map<(d0) -> (d0)>
 
-// Same as above but with linalg.init_tensor op.
+// Same as above but with tensor.empty op.
 
 // CHECK: #map = affine_map<(d0) -> (d0)>
-// CHECK-LABEL: func @init_tensor(
+// CHECK-LABEL: func @empty_tensor(
 // CHECK-SAME:      %[[IN:.*]]: tensor<?xf32>, %[[SIZE:.*]]: index)
 // CHECK-DAG:     %[[MEMREF:.*]] = bufferization.to_memref %[[IN]] : memref<?xf32>
 // CHECK-DAG:     %[[OUT_BUF:.*]] = memref.alloc(%[[SIZE]]) {{.*}} : memref<?xf32>
 // CHECK:         linalg.generic
 // CHECK-SAME:    ins(%[[MEMREF]] : memref<?xf32>)
 // CHECK-SAME:    outs(%[[OUT_BUF]] : memref<?xf32>) {
-func.func @init_tensor(%in : tensor<?xf32>, %size: index) -> tensor<?xf32> {
-  %init = linalg.init_tensor [%size] : tensor<?xf32>
+func.func @empty_tensor(%in : tensor<?xf32>, %size: index) -> tensor<?xf32> {
+  %init = tensor.empty(%size) : tensor<?xf32>
   %0 = linalg.generic {
     indexing_maps = [#map0, #map0],
     iterator_types = ["parallel"]
@@ -208,7 +208,7 @@
 // CHECK:   return %[[call]]
 func.func public @main(%arg0: tensor<2x3xi1>) -> tensor<6xi64> {
   %0 = tensor.collapse_shape %arg0 [[0, 1]] : tensor<2x3xi1> into tensor<6xi1>
-  %1 = linalg.init_tensor [6] : tensor<6xi64>
+  %1 = tensor.empty() : tensor<6xi64>
   %2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<6xi1>) outs(%1 : tensor<6xi64>) {
   ^bb0(%arg1: i1, %arg2: i64):
     %4 = arith.extui %arg1 : i1 to i64
diff --git a/mlir/test/Dialect/Linalg/canonicalize-duplicate-inputs.mlir b/mlir/test/Dialect/Linalg/canonicalize-duplicate-inputs.mlir
--- a/mlir/test/Dialect/Linalg/canonicalize-duplicate-inputs.mlir
+++ b/mlir/test/Dialect/Linalg/canonicalize-duplicate-inputs.mlir
@@ -131,8 +131,8 @@
 #map0 = affine_map<(d0) -> (d0)>
 #map1 = affine_map<(d0) -> ()>
 func.func @argmax_lowering(%arg0 : tensor<?xf32>) -> tensor<i32> {
-  %init0 = linalg.init_tensor [] : tensor<f32>
-  %init1 = linalg.init_tensor [] : tensor<i32>
+  %init0 = tensor.empty() : tensor<f32>
+  %init1 = tensor.empty() : tensor<i32>
   %0:2 = linalg.generic {
     indexing_maps = [#map0, #map1, #map1],
     iterator_types = ["reduction"]}
@@ -153,8 +153,8 @@
 }
 //      CHECK: func @argmax_lowering(
 // CHECK-SAME:     %[[ARG0:.+]]: tensor<?xf32>
-//  CHECK-DAG:   %[[INIT0:.+]] = linalg.init_tensor [] : tensor<f32>
-//  CHECK-DAG:   %[[INIT1:.+]] = linalg.init_tensor [] : tensor<i32>
+//  CHECK-DAG:   %[[INIT0:.+]] = tensor.empty() : tensor<f32>
+//  CHECK-DAG:   %[[INIT1:.+]] = tensor.empty() : tensor<i32>
 //      CHECK:   %[[GENERIC:.+]]:2 = linalg.generic
 // CHECK-SAME:       outs(%[[INIT0]], %[[INIT1]] :
 //      CHECK:   return %[[GENERIC]]#1
@@ -164,7 +164,7 @@
 // Do not remove operand needed for loop dim.
 func.func @loop_dim_operand(%arg0 : tensor<?xf32>) -> tensor<i32> {
   %cst = arith.constant 0 : i32
-  %init = linalg.init_tensor [] : tensor<i32>
+  %init = tensor.empty() : tensor<i32>
   %fill = linalg.fill ins(%cst : i32) outs(%init : tensor<i32>) -> tensor<i32>
   %0 = linalg.generic {
       indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>],
@@ -188,8 +188,8 @@
 // Do not remove outs operand needed for loop bound computation.
 func.func @loop_dim_outs_operand(%arg0 : index) -> tensor<i32> {
   %cst = arith.constant 0 : i32
-  %init1 = linalg.init_tensor [%arg0] : tensor<?xi32>
-  %init = linalg.init_tensor [] : tensor<i32>
+  %init1 = tensor.empty(%arg0) : tensor<?xi32>
+  %init = tensor.empty() : tensor<i32>
   %fill = linalg.fill ins(%cst : i32) outs(%init : tensor<i32>) -> tensor<i32>
   %0:2 = linalg.generic {
       indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>],
@@ -205,7 +205,7 @@
 }
 //      CHECK: func @loop_dim_outs_operand(
 // CHECK-SAME:     %[[ARG0:.+]]: index
-//      CHECK:   %[[INIT:.+]] = linalg.init_tensor [%[[ARG0]]]
+//      CHECK:   %[[INIT:.+]] = tensor.empty(%[[ARG0]])
 //      CHECK:   linalg.generic
 // CHECK-SAME:       outs(%[[INIT]]
 
diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir
--- a/mlir/test/Dialect/Linalg/canonicalize.mlir
+++ b/mlir/test/Dialect/Linalg/canonicalize.mlir
@@ -85,48 +85,6 @@
 
 // -----
 
-func.func @init_tensor_canonicalize() -> (tensor<4x5x?xf32>) {
-  %c6 = arith.constant 6 : index
-  %0 = linalg.init_tensor [4, 5, %c6] : tensor<4x5x?xf32>
-  return %0 : tensor<4x5x?xf32>
-}
-// CHECK: func @init_tensor_canonicalize
-// CHECK:   %[[T0:.+]] = linalg.init_tensor [4, 5, 6] : tensor<4x5x6xf32>
-// CHECK:   %[[T1:.+]] = tensor.cast %[[T0]] : tensor<4x5x6xf32> to tensor<4x5x?xf32>
-// CHECK:   return %[[T1]]
-
-// -----
-
-func.func @init_tensor_reshape_expansion(%arg0 : index) -> tensor<2x3x5x4x?x7xf32> {
-  %0 = linalg.init_tensor [6, 5, %arg0] : tensor<6x5x?xf32>
-  %1 = tensor.expand_shape %0 [[0, 1], [2], [3, 4, 5]]
-      : tensor<6x5x?xf32> into tensor<2x3x5x4x?x7xf32>
-  return %1 : tensor<2x3x5x4x?x7xf32>
-}
-//      CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 floordiv 28)>
-//      CHECK: func @init_tensor_reshape_expansion
-// CHECK-SAME:     %[[ARG0:.+]]: index
-// CHECK-NEXT:   %[[D:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]]
-// CHECK-NEXT:   %[[INIT:.+]] = linalg.init_tensor [2, 3, 5, 4, %[[D]], 7]
-// CHECK-NEXT:   return %[[INIT]]
-
-// -----
-
-func.func @init_tensor_reshape_collapse(%arg0 : index) -> tensor<6x5x?xf32> {
-  %0 = linalg.init_tensor [2, 3, 5, 4, %arg0, 7] : tensor<2x3x5x4x?x7xf32>
-  %1 = tensor.collapse_shape %0 [[0, 1], [2], [3, 4, 5]]
-      : tensor<2x3x5x4x?x7xf32> into tensor<6x5x?xf32>
-  return %1 : tensor<6x5x?xf32>
-}
-//      CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 * 28)>
-//      CHECK: func @init_tensor_reshape_collapse
-// CHECK-SAME:     %[[ARG0:.+]]: index
-// CHECK-NEXT:   %[[D:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]]
-// CHECK-NEXT:   %[[INIT:.+]] = linalg.init_tensor [6, 5, %[[D]]]
-// CHECK-NEXT:   return %[[INIT]]
-
-// -----
-
 #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
 func.func @remove_no_op(%arg0 : tensor<?x?x?xf32>, %arg1 : tensor<?x?x?xf32>)
   -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
@@ -136,7 +94,7 @@
   %0 = tensor.dim %arg0, %c0 : tensor<?x?x?xf32>
   %1 = tensor.dim %arg0, %c1 : tensor<?x?x?xf32>
   %2 = tensor.dim %arg0, %c2 : tensor<?x?x?xf32>
-  %3 = linalg.init_tensor [%0, %1, %2] : tensor<?x?x?xf32>
+  %3 = tensor.empty(%0, %1, %2) : tensor<?x?x?xf32>
   %4, %5 = linalg.generic {
     indexing_maps = [#map, #map, #map, #map],
     iterator_types = ["parallel", "parallel", "parallel"]
@@ -157,7 +115,7 @@
 #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
 func.func @remove_no_op_mismatched_types(%arg0 : tensor<?x?x?xf32>)
   -> tensor<1x2x3xf32> {
-  %out = linalg.init_tensor [1, 2, 3] : tensor<1x2x3xf32>
+  %out = tensor.empty() : tensor<1x2x3xf32>
   %g = linalg.generic {
     indexing_maps = [#map, #map],
     iterator_types = ["parallel", "parallel", "parallel"]
@@ -177,7 +135,7 @@
 
 #map = affine_map<() -> ()>
 func.func @cant_fold_to_tensor_cast(%arg0 : f32) -> tensor<f32> {
-  %out = linalg.init_tensor [] : tensor<f32>
+  %out = tensor.empty() : tensor<f32>
   %g = linalg.generic {
     indexing_maps = [#map, #map],
     iterator_types = []
@@ -200,7 +158,7 @@
   %cst = arith.constant 1.000000e+00 : f32
   %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
-  %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
+  %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
   cf.br ^bb1(%cst : f32)
 
 ^bb1(%arg1 : f32):
@@ -226,7 +184,7 @@
   %cst = arith.constant 1.000000e+00 : f32
   %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
-  %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
+  %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
   cf.br ^bb1(%cst : f32)
 
 ^bb1(%arg2 : f32):
@@ -246,33 +204,6 @@
 
 // -----
 
-func.func @fold_init_tensor_with_slice
-  (%arg0 : index, %arg1 : index) -> tensor<5x?x20xf32>
-{
-  %0 = linalg.init_tensor[%arg0, 10, 40] : tensor<?x10x40xf32>
-  %1 = tensor.extract_slice %0[0, 0, 0] [5, %arg1, 20] [1, 1, 1]
-    : tensor<?x10x40xf32> to tensor<5x?x20xf32>
-  return %1 : tensor<5x?x20xf32>
-}
-//      CHECK: func @fold_init_tensor_with_slice
-// CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: index
-// CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: index
-//      CHECK:   %[[T0:.+]] = linalg.init_tensor [5, %[[ARG1]], 20]
-//      CHECK:   return %[[T0]]
-
-// -----
-
-func.func @fold_init_tensor_with_cast(%arg0 : index) -> tensor<1x12xf32> {
-  %0 = linalg.init_tensor [%arg0, 12] : tensor<?x12xf32>
-  %1 = tensor.cast %0 : tensor<?x12xf32> to tensor<1x12xf32>
-  return %1 : tensor<1x12xf32>
-}
-//      CHECK: func @fold_init_tensor_with_cast(%[[ARG0:.+]]: index)
-//      CHECK:   %[[T0:.+]] = linalg.init_tensor [1, 12] : tensor<1x12xf32>
-//      CHECK:   return %[[T0]] : tensor<1x12xf32>
-
-// -----
-
 #accesses = [
   affine_map<(i, j) -> (i, j)>
 ]
@@ -315,7 +246,7 @@
   %c1 = arith.constant 1 : index
   %c21 = arith.constant 21 : index
   %c42 = arith.constant 42 : index
-  %0 = linalg.init_tensor [%c21, %c42] : tensor<?x?xf32>
+  %0 = tensor.empty(%c21, %c42) : tensor<?x?xf32>
   %1 = linalg.fill ins(%arg1 : f32) outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %2 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %3 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
@@ -323,7 +254,7 @@
   return %4 : tensor<?x?xf32>
 }
 // CHECK-LABEL: func @propogate_casts
-//       CHECK:   %[[INIT:.+]] = linalg.init_tensor [21, 42]
+//       CHECK:   %[[INIT:.+]] = tensor.empty
 //       CHECK:   %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[INIT]]
 //       CHECK:   %[[INSERTED:.+]] = tensor.insert_slice %{{.+}} into %[[FILL]]
 //       CHECK:   %[[RESULT:.+]] = tensor.cast %[[INSERTED]]
@@ -353,8 +284,8 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 7.0 : f32
   %0 = tensor.dim %arg0, %c0 : tensor<?xf32>
-  %1 = linalg.init_tensor [%0] : tensor<?xf32>
-  %2 = linalg.init_tensor [%0] : tensor<?xf32>
+  %1 = tensor.empty(%0) : tensor<?xf32>
+  %2 = tensor.empty(%0) : tensor<?xf32>
   %3 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types=["parallel"]} ins(%arg0, %1 : tensor<?xf32>, tensor<?xf32>) outs (%2:tensor<?xf32>) {
   ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
     %4 = arith.addf  %arg1, %cst : f32
@@ -378,9 +309,9 @@
   %cst2 = arith.constant 6.0 : f32
   %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
-  %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
-  %3 = linalg.init_tensor [%1, %0] : tensor<?x?xf32>
-  %4 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
+  %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
+  %3 = tensor.empty(%1, %0) : tensor<?x?xf32>
+  %4 = tensor.empty(%0, %1) : tensor<?x?xf32>
   %5 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types=["parallel","parallel"]} ins(%2, %3 : tensor<?x?xf32>, tensor<?x?xf32>) outs (%4:tensor<?x?xf32>) {
   ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
     %6 = arith.divf  %cst1, %cst2 : f32
@@ -393,10 +324,10 @@
 // CHECK-LABEL: func @fold_fill_reshape()
 func.func @fold_fill_reshape() -> tensor<6x4xf32> {
   %zero = arith.constant 0.0 : f32
-  // CHECK: %[[INIT:.+]] = linalg.init_tensor [6, 4] : tensor<6x4xf32>
-  %init = linalg.init_tensor [1, 2, 3, 4] : tensor<1x2x3x4xf32>
+  // CHECK: %[[INIT:.+]] = tensor.empty() : tensor<6x4xf32>
+  %empty = tensor.empty() : tensor<1x2x3x4xf32>
   // CHECK: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor<6x4xf32>) -> tensor<6x4xf32>
-  %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<1x2x3x4xf32>) -> tensor<1x2x3x4xf32>
+  %fill = linalg.fill ins(%zero : f32) outs(%empty : tensor<1x2x3x4xf32>) -> tensor<1x2x3x4xf32>
   %reshape = tensor.collapse_shape %fill [[0, 1, 2], [3]]
       : tensor<1x2x3x4xf32> into tensor<6x4xf32>
   // CHECK: return %[[FILL]] : tensor<6x4xf32>
@@ -418,43 +349,6 @@
   return %1 : tensor<?x?xf32>
 }
 
-
-// -----
-
-func.func private @some_use(%i : index, %j : index)
-
-// CHECK-LABEL: func @init_canonicalize
-//  CHECK-SAME:   %[[I:.*]]: index
-func.func @init_canonicalize(%i : index) {
-  %c0 = arith.constant 0 : index
-  %c1 = arith.constant 1 : index
-
-  // CHECK-NOT: init_tensor
-  %0 = linalg.init_tensor [%i, 42] : tensor<?x42xf32>
-
-  // CHECK-NOT: tensor.dim
-  %1 = tensor.dim %0, %c0: tensor<?x42xf32>
-  %2 = tensor.dim %0, %c1: tensor<?x42xf32>
-
-  // CHECK: %[[c42:.*]] = arith.constant 42 : index
-  // CHECK: call @some_use(%[[I]], %[[c42]])
-  call @some_use(%1, %2) : (index, index) -> ()
-
-  return
-}
-
-// -----
-
-// CHECK-LABEL: func @rank_reducing_init_extract
-func.func @rank_reducing_init_extract(%sz : index, %idx : index) -> tensor<2xf32> {
-  // CHECK: linalg.init_tensor [2] : tensor<2xf32>
-  %a = linalg.init_tensor [%sz, 2] : tensor<?x2xf32>
-
-  // CHECK-NOT: extract
-  %r = tensor.extract_slice %a[%idx, 0] [1, 2] [1, 1] : tensor<?x2xf32> to tensor<2xf32>
-  return %r: tensor<2xf32>
-}
-
 // -----
 
 // CHECK: func @fold_self_copy
@@ -475,13 +369,13 @@
 
 // CHECK-LABEL: func @fold_static_pad_fill
 //       CHECK:   %[[F0:.+]] = arith.constant 0.000000e+00 : f32
-//       CHECK:   %[[INIT:.+]] = linalg.init_tensor [412, 276] : tensor<412x276xf32>
+//       CHECK:   %[[INIT:.+]] = tensor.empty() : tensor<412x276xf32>
 //       CHECK:   %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}outs(%[[INIT]]
 //       CHECK:   return %[[FILL]]
 func.func @fold_static_pad_fill() -> tensor<412x276xf32> {
   %f0 = arith.constant 0.0 : f32
-  %init = linalg.init_tensor [400, 273] : tensor<400x273xf32>
-  %fill = linalg.fill ins(%f0 : f32) outs(%init : tensor<400x273xf32>) -> tensor<400x273xf32>
+  %empty = tensor.empty() : tensor<400x273xf32>
+  %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<400x273xf32>) -> tensor<400x273xf32>
   %pad = tensor.pad %fill low[4, 1] high[8, 2] {
   ^bb0(%arg1: index, %arg2: index):
     tensor.yield %f0 : f32
@@ -507,12 +401,12 @@
 //      CHECK:   %[[S1:.+]] = affine.apply #[[MAP1]]()[%[[DIM1]]]
 //      CHECK:   %[[S2:.+]] = affine.apply #[[MAP2]]()[%[[HIGH2]]]
 //      CHECK:   %[[S3:.+]] = affine.apply #[[MAP3]]()[%[[LOW3]], %[[HIGH3]]]
-//      CHECK:   %[[INIT:.+]] = linalg.init_tensor [%[[S0]], %[[S1]], %[[S2]], %[[S3]]] : tensor<?x?x?x?xf32>
+//      CHECK:   %[[INIT:.+]] = tensor.empty(%[[S0]], %[[S1]], %[[S2]], %[[S3]]) : tensor<?x?x?x?xf32>
 //      CHECK:   %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}outs(%[[INIT]]
 //      CHECK:   return %[[FILL]]
-func.func @fold_dynamic_pad_fill(%init: tensor<8x?x16x32xf32>, %low0: index, %low3: index, %high2: index, %high3: index) -> tensor<?x?x?x?xf32> {
+func.func @fold_dynamic_pad_fill(%empty: tensor<8x?x16x32xf32>, %low0: index, %low3: index, %high2: index, %high3: index) -> tensor<?x?x?x?xf32> {
   %f0 = arith.constant 0.0 : f32
-  %fill = linalg.fill ins(%f0 : f32) outs(%init : tensor<8x?x16x32xf32>) -> tensor<8x?x16x32xf32>
+  %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x?x16x32xf32>) -> tensor<8x?x16x32xf32>
   %pad = tensor.pad %fill low[%low0, 8, 7, %low3] high[1, 2, %high2, %high3] {
   ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
     tensor.yield %f0 : f32
@@ -526,8 +420,8 @@
 func.func @no_fold_pad_fill_value_mismatch() -> tensor<412x276xf32> {
   %f0 = arith.constant 0.0 : f32
   %f1 = arith.constant 1.0 : f32
-  %init = linalg.init_tensor [400, 273] : tensor<400x273xf32>
-  %fill = linalg.fill ins(%f0 : f32) outs(%init : tensor<400x273xf32>) -> tensor<400x273xf32>
+  %empty = tensor.empty() : tensor<400x273xf32>
+  %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<400x273xf32>) -> tensor<400x273xf32>
   // CHECK: tensor.pad
   %pad = tensor.pad %fill low[4, 1] high[8, 2] {
   ^bb0(%arg1: index, %arg2: index):
@@ -552,7 +446,7 @@
   %0 = tensor.dim %arg0, %c0 : tensor<2x3x4xf32>
   %1 = tensor.dim %arg0, %c1 : tensor<2x3x4xf32>
   %2 = tensor.dim %arg0, %c2 : tensor<2x3x4xf32>
-  %3 = linalg.init_tensor [%0, %1, %2] : tensor<?x?x?xf32>
+  %3 = tensor.empty(%0, %1, %2) : tensor<?x?x?xf32>
   %4 = linalg.generic {
     indexing_maps = [#map, #map, #map],
     iterator_types = ["parallel", "parallel", "parallel"]
@@ -582,7 +476,7 @@
   %0 = tensor.dim %arg0, %c0 : tensor<2x3x4xf32>
   %1 = tensor.dim %arg0, %c1 : tensor<2x3x4xf32>
   %2 = tensor.dim %arg0, %c2 : tensor<2x3x4xf32>
-  %3 = linalg.init_tensor [%0, %1, %2] : tensor<?x?x?xf32>
+  %3 = tensor.empty(%0, %1, %2) : tensor<?x?x?xf32>
   %4 = tensor.cast %arg1 : tensor<?x?x?xf32> to tensor<2x?x?xf32>
   %5 = linalg.generic {
     indexing_maps = [#map, #map, #map],
@@ -613,7 +507,7 @@
   %0 = tensor.dim %arg2, %c0 : tensor<2x3x4xf32>
   %1 = tensor.dim %arg2, %c1 : tensor<2x3x4xf32>
   %2 = tensor.dim %arg2, %c2 : tensor<2x3x4xf32>
-  %3 = linalg.init_tensor [%0, %1, %2] : tensor<?x?x?xf32>
+  %3 = tensor.empty(%0, %1, %2) : tensor<?x?x?xf32>
   %4 = tensor.cast %3 : tensor<?x?x?xf32> to tensor<2x3x4xf32>
   %5 = tensor.cast %arg1 : tensor<?x?x?xf32> to tensor<2x?x?xf32>
   %6 = linalg.generic {
@@ -647,7 +541,7 @@
   %0 = tensor.dim %arg0, %c0 : tensor<2x3x4xf32>
   %1 = tensor.dim %arg0, %c1 : tensor<2x3x4xf32>
   %2 = tensor.dim %arg0, %c2 : tensor<2x3x4xf32>
-  %3 = linalg.init_tensor [%0, %1, %2] : tensor<?x?x?xf32>
+  %3 = tensor.empty(%0, %1, %2) : tensor<?x?x?xf32>
   %4 = tensor.cast %arg0 : tensor<2x3x4xf32> to tensor<2x?x?xf32>
   %5 = tensor.cast %arg1 : tensor<2x3x4xf32> to tensor<2x?x?xf32>
   %6 = linalg.generic {
@@ -672,7 +566,7 @@
 // CHECK-LABEL: func @cast_dest
 // CHECK-SAME:  (%[[ARG0:.*]]: tensor<?x?x?xf32>, %[[ARG1:.*]]: tensor<1x?x?xf32>,
 func.func @cast_dest(%arg0: tensor<?x?x?xf32>, %arg1: tensor<1x?x?xf32>, %arg2: index, %arg3: index, %arg4: index) -> tensor<?x?x?xf32> {
-  %0 = linalg.init_tensor [%arg2, %arg3, %arg4] : tensor<?x?x?xf32>
+  %0 = tensor.empty(%arg2, %arg3, %arg4) : tensor<?x?x?xf32>
   %1 = tensor.cast %arg1 : tensor<1x?x?xf32> to tensor<?x?x?xf32>
   %2 = linalg.generic {
     indexing_maps = [#map, #map, #map],
@@ -699,7 +593,7 @@
 //   CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
 //   CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
 //   CHECK-DAG: %[[F0:.+]] = arith.constant 0.000000e+00 : f32
-//       CHECK: %[[INIT:.+]] = linalg.init_tensor [8, 384, 384]
+//       CHECK: %[[INIT:.+]] = tensor.empty()
 //       CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}outs(%[[INIT]]
 //       CHECK: %[[OFFSET1:.+]] = affine.apply #[[$MAP]]()[%[[LOW1]]]
 //       CHECK: %[[D0:.+]] = tensor.dim %[[INPUT]], %[[C0]] : tensor<?x?x?xf32>
@@ -713,8 +607,8 @@
   ^bb0(%arg3: index, %arg4: index, %arg5: index):
     tensor.yield %f0 : f32
   } : tensor<?x?x?xf32> to tensor<8x128x128xf32>
-  %init = linalg.init_tensor [8, 384, 384] : tensor<8x384x384xf32>
-  %fill = linalg.fill ins(%f0 : f32) outs(%init : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
+  %empty = tensor.empty() : tensor<8x384x384xf32>
+  %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
   %0 = tensor.insert_slice %pad into %fill[0, 1, 2] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   return %0: tensor<8x384x384xf32>
 }
@@ -734,8 +628,8 @@
   ^bb0(%arg3: index, %arg4: index, %arg5: index):
     tensor.yield %f0 : f32
   } : tensor<7x123x124xf32> to tensor<8x128x128xf32>
-  %init = linalg.init_tensor [8, 384, 384] : tensor<8x384x384xf32>
-  %fill = linalg.fill ins(%f0 : f32) outs(%init : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
+  %empty = tensor.empty() : tensor<8x384x384xf32>
+  %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
   %0 = tensor.insert_slice %a   into %fill[%offset, 0, 0]  [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   %1 = tensor.insert_slice %a   into %0   [0, 128, %offset][8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   %2 = tensor.insert_slice %pad into %1   [0, 0, 256]      [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
@@ -753,8 +647,8 @@
   ^bb0(%arg3: index, %arg4: index, %arg5: index):
     tensor.yield %f0 : f32
   } : tensor<7x123x124xf32> to tensor<8x128x128xf32>
-  %init = linalg.init_tensor [8, 384, 384] : tensor<8x384x384xf32>
-  %fill = linalg.fill ins(%f0 : f32) outs(%init : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
+  %empty = tensor.empty() : tensor<8x384x384xf32>
+  %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
   %0 = tensor.insert_slice %a   into %fill[%offset, 0, 0]  [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   %1 = tensor.insert_slice %a   into %0   [0, 0, 129]      [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   // Range overlap with %1 at dim#3
@@ -773,8 +667,8 @@
   ^bb0(%arg3: index, %arg4: index, %arg5: index):
     tensor.yield %f0 : f32
   } : tensor<7x123x124xf32> to tensor<8x128x128xf32>
-  %init = linalg.init_tensor [8, 384, 384] : tensor<8x384x384xf32>
-  %fill = linalg.fill ins(%f0 : f32) outs(%init : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
+  %empty = tensor.empty() : tensor<8x384x384xf32>
+  %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
   %0 = tensor.insert_slice %a   into %fill[0, 0, %offset]  [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   %1 = tensor.insert_slice %a   into %0   [0, 128, 255]    [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   // Range overlap with %0 at dim#3
@@ -793,8 +687,8 @@
   ^bb0(%arg3: index, %arg4: index, %arg5: index):
     tensor.yield %f0 : f32
   } : tensor<7x123x124xf32> to tensor<8x128x128xf32>
-  %init = linalg.init_tensor [8, 384, 384] : tensor<8x384x384xf32>
-  %fill = linalg.fill ins(%f0 : f32) outs(%init : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
+  %empty = tensor.empty() : tensor<8x384x384xf32>
+  %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
   // Overlap btween %0 and %1 is fine but not with %2 is fine.
   // CHECK-COUNT-3: tensor.insert_slice
   %0 = tensor.insert_slice %a   into %fill[0, 0, %offset]  [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
@@ -815,9 +709,9 @@
   ^bb0(%arg3: index, %arg4: index, %arg5: index):
     tensor.yield %f0 : f32
   } : tensor<7x123x124xf32> to tensor<8x128x128xf32>
-  %init = linalg.init_tensor [8, 384, 384] : tensor<8x384x384xf32>
+  %empty = tensor.empty() : tensor<8x384x384xf32>
   // Different filling value than padding value.
-  %fill = linalg.fill ins(%f1 : f32) outs(%init : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
+  %fill = linalg.fill ins(%f1 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
   %0 = tensor.insert_slice %a   into %fill[%offset, 0, 0]  [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   %1 = tensor.insert_slice %a   into %0   [0, 128, %offset][8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   %2 = tensor.insert_slice %pad into %1   [0, 0, 256]      [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
@@ -903,14 +797,14 @@
   %d0 = tensor.dim %arg0, %c0 : tensor<?x?x?xf32>
   %d1 = tensor.dim %arg0, %c1 : tensor<?x?x?xf32>
   %d2 = tensor.dim %arg0, %c2 : tensor<?x?x?xf32>
-  %init1 = linalg.init_tensor [%d1, %d2, %d0] : tensor<?x?x?xf32>
-  %init2 = linalg.init_tensor [%d2, %d1, %d0] : tensor<?x?x?xf32>
+  %empty1 = tensor.empty(%d1, %d2, %d0) : tensor<?x?x?xf32>
+  %empty2 = tensor.empty(%d2, %d1, %d0) : tensor<?x?x?xf32>
   %0:2 = linalg.generic {
       iterator_types = ["parallel", "parallel", "parallel"],
       indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
                        affine_map<(d0, d1, d2) -> (d1, d2, d0)>,
                        affine_map<(d0, d1, d2) -> (d2, d1, d0)>]}
-      ins(%arg0 : tensor<?x?x?xf32>) outs(%init1, %init2 : tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
+      ins(%arg0 : tensor<?x?x?xf32>) outs(%empty1, %empty2 : tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32) :
       linalg.yield %b0, %b0 : f32, f32
     } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>)
@@ -919,9 +813,9 @@
 }
 //       CHECK: func @fold_multi_use_generic_op_with_consumer
 //  CHECK-SAME:     %[[ARG0:.+]]: tensor<?x?x?xf32>
-//   CHECK-DAG:   %[[INIT1:.+]] = linalg.init_tensor [2, 3, 4] : tensor<2x3x4xf32>
+//   CHECK-DAG:   %[[INIT1:.+]] = tensor.empty() : tensor<2x3x4xf32>
 //   CHECK-DAG:   %[[CAST:.+]] = tensor.cast %[[ARG0]] : tensor<?x?x?xf32> to tensor<4x3x2xf32>
-//   CHECK-DAG:   %[[INIT2:.+]] = linalg.init_tensor [3, 2, 4] : tensor<3x2x4xf32>
+//   CHECK-DAG:   %[[INIT2:.+]] = tensor.empty() : tensor<3x2x4xf32>
 //       CHECK:   %[[GENERIC:.+]]:2 = linalg.generic
 //  CHECK-SAME:       ins(%[[CAST]] :
 //  CHECK-SAME:       outs(%[[INIT2]], %[[INIT1]] :
diff --git a/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir b/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir
--- a/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir
+++ b/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir
@@ -75,7 +75,7 @@
 //  CHECK-SAME:   %[[ARG0:[0-9a-zA-Z]*]]: tensor<f32>
 //  CHECK-SAME:   %[[ARG1:[0-9a-zA-Z]*]]: tensor<f32>
 func.func @cmpf(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<i1> {
-  // CHECK: %[[INIT:.*]] = linalg.init_tensor [] : tensor<i1>
+  // CHECK: %[[INIT:.*]] = tensor.empty() : tensor<i1>
   // CHECK: linalg.generic
   // CHECK-SAME:  ins(%[[ARG0]], %[[ARG1]]
   // CHECK-SAME: outs(%[[INIT]]
@@ -98,7 +98,7 @@
   // CHECK: %[[D2:.*]] = tensor.dim %[[ARG0]], %[[C2]] : tensor<4x?x?x8x2x?xf32>
   // CHECK: %[[C5:.*]] = arith.constant 5 : index
   // CHECK: %[[D5:.*]] = tensor.dim %[[ARG0]], %[[C5]] : tensor<4x?x?x8x2x?xf32>
-  // CHECK: %[[INIT:.*]] = linalg.init_tensor [4, %[[D1]], %[[D2]], 8, 2, %[[D5]]] : tensor<4x?x?x8x2x?xi1>
+  // CHECK: %[[INIT:.*]] = tensor.empty(%[[D1]], %[[D2]], %[[D5]]) : tensor<4x?x?x8x2x?xi1>
   // CHECK: linalg.generic
   // CHECK-SAME:  ins(%[[ARG0]], %[[ARG1]]
   // CHECK-SAME: outs(%[[INIT]]
diff --git a/mlir/test/Dialect/Linalg/decompose-ops.mlir b/mlir/test/Dialect/Linalg/decompose-ops.mlir
--- a/mlir/test/Dialect/Linalg/decompose-ops.mlir
+++ b/mlir/test/Dialect/Linalg/decompose-ops.mlir
@@ -7,8 +7,8 @@
   %c1 = arith.constant 1 : index
   %d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %d1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
-  %init1 = linalg.init_tensor [%d1, %d0] : tensor<?x?xf32>
-  %init2 = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
+  %init1 = tensor.empty(%d1, %d0) : tensor<?x?xf32>
+  %init2 = tensor.empty(%d0, %d1) : tensor<?x?xf32>
   %result:2 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>, 
                      affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d1, d0)>,
@@ -35,8 +35,8 @@
 //  CHECK-DAG:   %[[C1:.+]] = arith.constant 1 : index
 //  CHECK-DAG:   %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
 //  CHECK-DAG:   %[[D1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
-//  CHECK-DAG:   %[[INIT1:.+]] = linalg.init_tensor [%[[D1]], %[[D0]]]
-//  CHECK-DAG:   %[[INIT2:.+]] = linalg.init_tensor [%[[D0]], %[[D1]]]
+//  CHECK-DAG:   %[[INIT1:.+]] = tensor.empty(%[[D1]], %[[D0]])
+//  CHECK-DAG:   %[[INIT2:.+]] = tensor.empty(%[[D0]], %[[D1]])
 //  CHECK-DAG:   %[[GENERIC1:.+]]:3 = linalg.generic
 // CHECK-SAME:       [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP0]], #[[MAP3]]]
 // CHECK-SAME:       ["parallel", "parallel"]
@@ -81,8 +81,8 @@
 //  CANONICALIZECHECK-DAG:   %[[C1:.+]] = arith.constant 1 : index
 //  CANONICALIZECHECK-DAG:   %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
 //  CANONICALIZECHECK-DAG:   %[[D1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
-//  CANONICALIZECHECK-DAG:   %[[INIT1:.+]] = linalg.init_tensor [%[[D1]], %[[D0]]]
-//  CANONICALIZECHECK-DAG:   %[[INIT2:.+]] = linalg.init_tensor [%[[D0]], %[[D1]]]
+//  CANONICALIZECHECK-DAG:   %[[INIT1:.+]] = tensor.empty(%[[D1]], %[[D0]])
+//  CANONICALIZECHECK-DAG:   %[[INIT2:.+]] = tensor.empty(%[[D0]], %[[D1]])
 //  CANONICALIZECHECK-DAG:   %[[GENERIC1:.+]] = linalg.generic
 // CANONICALIZECHECK-SAME:       [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
 // CANONICALIZECHECK-SAME:       ["parallel", "parallel"]
@@ -116,8 +116,8 @@
   %c1 = arith.constant 1 : index
   %d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %d1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
-  %init1 = linalg.init_tensor [%d1, %d0] : tensor<?x?xf32>
-  %init2 = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
+  %init1 = tensor.empty(%d1, %d0) : tensor<?x?xf32>
+  %init2 = tensor.empty(%d0, %d1) : tensor<?x?xf32>
   %result:3 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>, 
                      affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d1, d0)>,
@@ -144,8 +144,8 @@
 //  CHECK-DAG:   %[[C1:.+]] = arith.constant 1 : index
 //  CHECK-DAG:   %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
 //  CHECK-DAG:   %[[D1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
-//  CHECK-DAG:   %[[INIT1:.+]] = linalg.init_tensor [%[[D1]], %[[D0]]]
-//  CHECK-DAG:   %[[INIT2:.+]] = linalg.init_tensor [%[[D0]], %[[D1]]]
+//  CHECK-DAG:   %[[INIT1:.+]] = tensor.empty(%[[D1]], %[[D0]])
+//  CHECK-DAG:   %[[INIT2:.+]] = tensor.empty(%[[D0]], %[[D1]])
 //  CHECK-DAG:   %[[GENERIC1:.+]]:4 = linalg.generic
 // CHECK-SAME:       [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP0]], #[[MAP0]], #[[MAP3]]]
 // CHECK-SAME:       ["parallel", "parallel"]
@@ -189,8 +189,8 @@
 //  CANONICALIZECHECK-DAG:   %[[C1:.+]] = arith.constant 1 : index
 //  CANONICALIZECHECK-DAG:   %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
 //  CANONICALIZECHECK-DAG:   %[[D1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
-//  CANONICALIZECHECK-DAG:   %[[INIT1:.+]] = linalg.init_tensor [%[[D1]], %[[D0]]]
-//  CANONICALIZECHECK-DAG:   %[[INIT2:.+]] = linalg.init_tensor [%[[D0]], %[[D1]]]
+//  CANONICALIZECHECK-DAG:   %[[INIT1:.+]] = tensor.empty(%[[D1]], %[[D0]])
+//  CANONICALIZECHECK-DAG:   %[[INIT2:.+]] = tensor.empty(%[[D0]], %[[D1]])
 //  CANONICALIZECHECK-DAG:   %[[GENERIC1:.+]]:2 = linalg.generic
 // CANONICALIZECHECK-SAME:       [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP0]]]
 // CANONICALIZECHECK-SAME:       ["parallel", "parallel"]
@@ -221,7 +221,7 @@
 #map1 = affine_map<(d0, d1) -> (d0)>
 #map2 = affine_map<(d0, d1) -> (d1, d0)>
 func.func @multi_statement(%arg0 : tensor<10x20xf32>, %arg1 : tensor<10xi32>) -> tensor<20x10xf64> {
-  %init = linalg.init_tensor [20, 10] : tensor<20x10xf64>
+  %init = tensor.empty() : tensor<20x10xf64>
   %0 = linalg.generic {
     indexing_maps = [#map0, #map1, #map2],
     iterator_types = ["parallel", "parallel"]}
@@ -242,8 +242,8 @@
 //      CHECK: func @multi_statement(
 // CHECK-SAME:     %[[ARG0:.+]]: tensor<10x20xf32>
 // CHECK-SAME:     %[[ARG1:.+]]: tensor<10xi32>)
-//  CHECK-DAG:   %[[INIT0:.+]] = linalg.init_tensor [20, 10] : tensor<20x10xf64>
-//  CHECK-DAG:   %[[INIT1:.+]] = linalg.init_tensor [10, 20] : tensor<10x20xf64>
+//  CHECK-DAG:   %[[INIT0:.+]] = tensor.empty() : tensor<20x10xf64>
+//  CHECK-DAG:   %[[INIT1:.+]] = tensor.empty() : tensor<10x20xf64>
 //      CHECK:   %[[GENERIC0:.+]]:2 = linalg.generic
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP0]]]
 // CHECK-SAME:       iterator_types = ["parallel", "parallel"]
@@ -290,8 +290,8 @@
 //      CANONICALIZECHECK: func @multi_statement(
 // CANONICALIZECHECK-SAME:     %[[ARG0:.+]]: tensor<10x20xf32>
 // CANONICALIZECHECK-SAME:     %[[ARG1:.+]]: tensor<10xi32>)
-//  CANONICALIZECHECK-DAG:   %[[INIT0:.+]] = linalg.init_tensor [20, 10] : tensor<20x10xf64>
-//  CANONICALIZECHECK-DAG:   %[[INIT1:.+]] = linalg.init_tensor [10, 20] : tensor<10x20xf64>
+//  CANONICALIZECHECK-DAG:   %[[INIT0:.+]] = tensor.empty() : tensor<20x10xf64>
+//  CANONICALIZECHECK-DAG:   %[[INIT1:.+]] = tensor.empty() : tensor<10x20xf64>
 //      CANONICALIZECHECK:   %[[GENERIC0:.+]] = linalg.generic
 // CANONICALIZECHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]]]
 // CANONICALIZECHECK-SAME:       iterator_types = ["parallel", "parallel"]
diff --git a/mlir/test/Dialect/Linalg/detensorize_0d.mlir b/mlir/test/Dialect/Linalg/detensorize_0d.mlir
--- a/mlir/test/Dialect/Linalg/detensorize_0d.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_0d.mlir
@@ -3,7 +3,7 @@
 #map = affine_map<() -> ()>
 
 func.func @detensor_simple(%arg1: tensor<f32>, %arg2: tensor<f32>) -> tensor<f32> attributes {iree.module.export} {
-  %0 = linalg.init_tensor [] : tensor<f32>
+  %0 = tensor.empty() : tensor<f32>
   %1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
     ins(%arg1, %arg2 : tensor<f32>, tensor<f32>)
     outs(%0 : tensor<f32>) {
@@ -22,7 +22,7 @@
 // CHECK:         return %[[new_tensor_res]]
 
 func.func @detensor_op_sequence(%arg1: tensor<f32>, %arg2: tensor<f32>) -> tensor<f32> attributes {iree.module.export} {
-  %0 = linalg.init_tensor [] : tensor<f32>
+  %0 = tensor.empty() : tensor<f32>
   %1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
     ins(%arg1, %arg2 : tensor<f32>, tensor<f32>)
     outs(%0 : tensor<f32>) {
@@ -31,7 +31,7 @@
     linalg.yield %2 : f32
   } -> tensor<f32>
 
-  %3 = linalg.init_tensor [] : tensor<f32>
+  %3 = tensor.empty() : tensor<f32>
   %4 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
     ins(%arg1, %1 : tensor<f32>, tensor<f32>)
     outs(%3 : tensor<f32>) {
@@ -40,7 +40,7 @@
     linalg.yield %5 : f32
   } -> tensor<f32>
 
-  %6 = linalg.init_tensor [] : tensor<f32>
+  %6 = tensor.empty() : tensor<f32>
   %7 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
     ins(%1, %4 : tensor<f32>, tensor<f32>)
     outs(%6 : tensor<f32>) {
@@ -62,7 +62,7 @@
 // CHECK:         return %[[new_tensor_res]]
 
 func.func @detensor_multiple_ops(%arg1: tensor<f32>, %arg2: tensor<f32>) -> tensor<f32> attributes {iree.module.export} {
-  %0 = linalg.init_tensor [] : tensor<f32>
+  %0 = tensor.empty() : tensor<f32>
   %1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
     ins(%arg1, %arg2 : tensor<f32>, tensor<f32>)
     outs(%0 : tensor<f32>) {
@@ -83,7 +83,7 @@
 // CHECK:         return %[[new_tensor_res]]
 
 func.func @detensor_foreign_op(%arg1: tensor<f32>, %arg2: tensor<f32>) -> tensor<f32> attributes {iree.module.export} {
-  %0 = linalg.init_tensor [] : tensor<f32>
+  %0 = tensor.empty() : tensor<f32>
   %1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
     ins(%arg1, %arg2 : tensor<f32>, tensor<f32>)
     outs(%0 : tensor<f32>) {
diff --git a/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir b/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir
--- a/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir
@@ -6,7 +6,7 @@
   %arg1_t = tensor.from_elements %arg1 : tensor<i32>
 
   %cst = arith.constant dense<10> : tensor<i32>
-  %2 = linalg.init_tensor [] : tensor<i8>
+  %2 = tensor.empty() : tensor<i8>
   %3 = linalg.generic
     {indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>], iterator_types = []}
     ins(%arg0_t : tensor<i1>)
@@ -19,7 +19,7 @@
   %5 = arith.trunci %4 : i8 to i1
   cf.cond_br %5, ^bb1, ^bb2(%arg1_t : tensor<i32>)
 ^bb1:
-  %6 = linalg.init_tensor [] : tensor<i32>
+  %6 = tensor.empty() : tensor<i32>
   %7 = linalg.generic
     {indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>, affine_map<() -> ()>], iterator_types = []}
     ins(%arg1_t, %cst : tensor<i32>, tensor<i32>)
diff --git a/mlir/test/Dialect/Linalg/detensorize_if.mlir b/mlir/test/Dialect/Linalg/detensorize_if.mlir
--- a/mlir/test/Dialect/Linalg/detensorize_if.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_if.mlir
@@ -15,7 +15,7 @@
   cf.br ^bb1(%0 : tensor<i32>)
 
 ^bb1(%2: tensor<i32>):  // 2 preds: ^bb0, ^bb2
-  %3 = linalg.init_tensor [] : tensor<i1>
+  %3 = tensor.empty() : tensor<i1>
   %4 = linalg.generic #attrs
     ins(%2, %1 : tensor<i32>, tensor<i32>)
     outs(%3 : tensor<i1>) {
@@ -27,7 +27,7 @@
   cf.cond_br %5, ^bb2(%2 : tensor<i32>), ^bb3(%2 : tensor<i32>)
 
 ^bb2(%6: tensor<i32>):  // pred: ^bb1
-  %7 = linalg.init_tensor [] : tensor<i32>
+  %7 = tensor.empty() : tensor<i32>
   %8 = linalg.generic #attrs
     ins(%6, %6 : tensor<i32>, tensor<i32>)
     outs(%7 : tensor<i32>) {
@@ -76,7 +76,7 @@
   cf.br ^bb1(%0 : tensor<i32>)
 
 ^bb1(%2: tensor<i32>):  // 2 preds: ^bb0, ^bb2
-  %3 = linalg.init_tensor [] : tensor<i1>
+  %3 = tensor.empty() : tensor<i1>
   %4 = linalg.generic #attrs
     ins(%2, %1 : tensor<i32>, tensor<i32>)
     outs(%3 : tensor<i1>) {
@@ -88,7 +88,7 @@
   cf.cond_br %5, ^bb2(%2 : tensor<i32>), ^bb3(%2 : tensor<i32>)
 
 ^bb2(%6: tensor<i32>):  // pred: ^bb1
-  %7 = linalg.init_tensor [] : tensor<i32>
+  %7 = tensor.empty() : tensor<i32>
   %8 = linalg.generic #attrs
     ins(%6, %6 : tensor<i32>, tensor<i32>)
     outs(%7 : tensor<i32>) {
@@ -139,7 +139,7 @@
   cf.br ^bb1(%0 : tensor<i32>)
 
 ^bb1(%2: tensor<i32>):  // 2 preds: ^bb0, ^bb2
-  %3 = linalg.init_tensor [] : tensor<i1>
+  %3 = tensor.empty() : tensor<i1>
   %4 = linalg.generic #attrs
     ins(%2, %1 : tensor<i32>, tensor<i32>)
     outs(%3 : tensor<i1>) {
@@ -156,7 +156,7 @@
 
 ^bb2(%6: tensor<i32>):  // pred: ^bb1
   %12 = tensor.from_elements %c10 : tensor<i32>
-  %7 = linalg.init_tensor [] : tensor<i32>
+  %7 = tensor.empty() : tensor<i32>
   %8 = linalg.generic #attrs
     ins(%6, %12 : tensor<i32>, tensor<i32>)
     outs(%7 : tensor<i32>) {
diff --git a/mlir/test/Dialect/Linalg/detensorize_trivial.mlir b/mlir/test/Dialect/Linalg/detensorize_trivial.mlir
--- a/mlir/test/Dialect/Linalg/detensorize_trivial.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_trivial.mlir
@@ -12,7 +12,7 @@
 func.func @main(%farg0 : tensor<i32>) -> (tensor<i1>) attributes {} {
   %c10 = arith.constant 10 : i32
   %1 = tensor.from_elements %c10 : tensor<i32>
-  %3 = linalg.init_tensor [] : tensor<i1>
+  %3 = tensor.empty() : tensor<i1>
   %4 = linalg.generic #attrs
     ins(%farg0, %1 : tensor<i32>, tensor<i32>)
     outs(%3 : tensor<i1>) {
@@ -34,7 +34,7 @@
 
 // DET-CF-LABEL: func @main(%{{.*}}: tensor<i32>)
 // DET-CF-NEXT:    arith.constant dense<10> : tensor<i32>
-// DET-CF-NEXT:    linalg.init_tensor [] : tensor<i1>
+// DET-CF-NEXT:    tensor.empty() : tensor<i1>
 // DET-CF-NEXT:    linalg.generic
 // DET-CF-NEXT:    ^{{.*}}(%{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i1)
 // DET-CF-NEXT:      arith.cmpi slt, %{{.*}}, %{{.*}}
diff --git a/mlir/test/Dialect/Linalg/detensorize_while.mlir b/mlir/test/Dialect/Linalg/detensorize_while.mlir
--- a/mlir/test/Dialect/Linalg/detensorize_while.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_while.mlir
@@ -12,7 +12,7 @@
   cf.br ^bb1(%farg0 : tensor<i32>)
 
 ^bb1(%0: tensor<i32>):  // 2 preds: ^bb0, ^bb2
-  %1 = linalg.init_tensor [] : tensor<i1>
+  %1 = tensor.empty() : tensor<i1>
   %2 = linalg.generic #attrs
     ins(%0, %farg1 : tensor<i32>, tensor<i32>)
     outs(%1 : tensor<i1>) {
@@ -24,7 +24,7 @@
   cf.cond_br %3, ^bb2(%0 : tensor<i32>), ^bb3(%0 : tensor<i32>)
 
 ^bb2(%4: tensor<i32>):  // pred: ^bb1
-  %5 = linalg.init_tensor [] : tensor<i32>
+  %5 = tensor.empty() : tensor<i32>
   %6 = linalg.generic #attrs
     ins(%4, %4 : tensor<i32>, tensor<i32>)
     outs(%5 : tensor<i32>) {
diff --git a/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir b/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir
--- a/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir
@@ -25,7 +25,7 @@
   cf.br ^bb1(%farg0 : tensor<10xi32>)
 
 ^bb1(%0: tensor<10xi32>):  // 2 preds: ^bb0, ^bb2
-  %1 = linalg.init_tensor [] : tensor<i32>
+  %1 = tensor.empty() : tensor<i32>
   %2 = linalg.generic #sum_reduction_attrs
     ins(%0: tensor<10xi32>)
     outs(%1: tensor<i32>) {
@@ -34,7 +34,7 @@
         linalg.yield %b : i32
   } -> tensor<i32>
 
-  %3 = linalg.init_tensor [] : tensor<i1>
+  %3 = tensor.empty() : tensor<i1>
   %4 = linalg.generic #attrs
     ins(%2, %farg1 : tensor<i32>, tensor<i32>)
     outs(%3 : tensor<i1>) {
@@ -46,7 +46,7 @@
   cf.cond_br %5, ^bb2(%2 : tensor<i32>), ^bb3(%2 : tensor<i32>)
 
 ^bb2(%6: tensor<i32>):  // pred: ^bb1
-  %7 = linalg.init_tensor [10] : tensor<10xi32>
+  %7 = tensor.empty() : tensor<10xi32>
   %9 = linalg.generic #broadcast_attrs
        ins(%6: tensor<i32>)
       outs(%7: tensor<10xi32>) {
@@ -66,7 +66,7 @@
 // DET-ALL-SAME:    (%{{.*}}: tensor<10xi32>, %{{.*}}: tensor<i32>)
 // DET-ALL:         cf.br ^[[bb1:.*]](%{{.*}} : tensor<10xi32>)
 // DET-ALL:       ^[[bb1]](%{{.*}}: tensor<10xi32>)
-// DET-ALL:         linalg.init_tensor [] : tensor<i32>
+// DET-ALL:         tensor.empty() : tensor<i32>
 // DET-ALL:         linalg.generic {{{.*}}} ins(%{{.*}} : tensor<10xi32>) outs(%{{.*}} : tensor<i32>) {
 // DET-ALL:         ^bb0(%{{.*}}: i32, %{{.*}}: i32):  
 // DET-ALL:           %{{.*}} = arith.addi %{{.*}}, %{{.*}}
@@ -77,7 +77,7 @@
 // DET-ALL:         cf.cond_br %{{.*}}, ^[[bb2:.*]](%{{.*}} : i32), ^[[bb3:.*]](%{{.*}} : i32)
 // DET-ALL:       ^[[bb2]](%{{.*}}: i32)
 // DET-ALL:         tensor.from_elements %{{.*}} : tensor<i32>
-// DET-ALL:         linalg.init_tensor [10] : tensor<10xi32>
+// DET-ALL:         tensor.empty() : tensor<10xi32>
 // DET-ALL:         linalg.generic {{{.*}}} ins(%{{.*}} : tensor<i32>) outs(%{{.*}} : tensor<10xi32>) {
 // DET-ALL:         ^bb0(%{{.*}}: i32, %{{.*}}: i32):
 // DET-ALL:           linalg.yield %{{.*}} : i32
diff --git a/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir b/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir
--- a/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir
@@ -17,7 +17,7 @@
   cf.br ^bb1(%reshaped0 : tensor<i32>)
 
 ^bb1(%2: tensor<i32>):  // 2 preds: ^bb0, ^bb2
-  %3 = linalg.init_tensor [] : tensor<i1>
+  %3 = tensor.empty() : tensor<i1>
   %4 = linalg.generic #attrs
     ins(%2, %reshaped1 : tensor<i32>, tensor<i32>)
     outs(%3 : tensor<i1>) {
@@ -29,7 +29,7 @@
   cf.cond_br %5, ^bb2(%2 : tensor<i32>), ^bb3
 
 ^bb2(%6: tensor<i32>):  // pred: ^bb1
-  %7 = linalg.init_tensor [] : tensor<i32>
+  %7 = tensor.empty() : tensor<i32>
   %8 = linalg.generic #attrs
     ins(%6, %6 : tensor<i32>, tensor<i32>)
     outs(%7 : tensor<i32>) {
diff --git a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
--- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
+++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
@@ -246,7 +246,7 @@
 #map1 = affine_map<(d0, d1, d2) -> (d2)>
 func.func @fold_unit_dim_tensor_reshape_op(%arg0 : tensor<5xf32>) -> tensor<2x5xf32>
 {
-  %1 = linalg.init_tensor [1, 2, 5] : tensor<1x2x5xf32>
+  %1 = tensor.empty() : tensor<1x2x5xf32>
   %2 = linalg.generic {i64, indexing_maps = [#map1, #map0],
     iterator_types = ["parallel", "parallel", "parallel"]}
     ins(%arg0 : tensor<5xf32>) outs(%1 : tensor<1x2x5xf32>) {
@@ -263,9 +263,9 @@
 
 // -----
 
-func.func @fold_unit_dim_for_init_tensor(%input: tensor<1x1000xf32>) -> tensor<1xf32> {
+func.func @fold_unit_dim_for_empty_tensor(%input: tensor<1x1000xf32>) -> tensor<1xf32> {
   %cst = arith.constant 0.0 : f32
-  %init = linalg.init_tensor [1] : tensor<1xf32>
+  %init = tensor.empty() : tensor<1xf32>
   %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1xf32>) -> tensor<1xf32>
   %add = linalg.generic {
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>],
@@ -282,11 +282,11 @@
 //   CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0)>
 //   CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> ()>
 
-//       CHECK: func @fold_unit_dim_for_init_tensor
+//       CHECK: func @fold_unit_dim_for_empty_tensor
 
 
 //       CHECK: %[[INPUT_RESHAPE:.+]] = tensor.collapse_shape %{{.+}} {{\[}}[0, 1]] : tensor<1x1000xf32> into tensor<1000xf32>
-//       CHECK: %[[INIT:.+]] = linalg.init_tensor [] : tensor<f32>
+//       CHECK: %[[INIT:.+]] = tensor.empty() : tensor<f32>
 //       CHECK: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor<f32>) -> tensor<f32>
 //       CHECK: %[[GENERIC:.+]] = linalg.generic
 //  CHECK-SAME:     indexing_maps = [#[[MAP1]], #[[MAP2]]]
@@ -330,7 +330,7 @@
   %cst = arith.constant 1.000000e+00 : f32
   %c3 = arith.constant 3 : index
   %0 = tensor.dim %arg0, %c3 : tensor<1x?x1x?xf32>
-  %1 = linalg.init_tensor [1, %0] : tensor<1x?xf32>
+  %1 = tensor.empty(%0) : tensor<1x?xf32>
   %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x?xf32>) -> tensor<1x?xf32>
   %3 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>,
@@ -349,7 +349,7 @@
 //      CHECK: func @unit_dim_for_reduction
 // CHECK-SAME:   %[[ARG0:.+]]: tensor<1x?x1x?xf32>
 //  CHECK-DAG:   %[[RESHAPE:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1, 2], [3]]
-//      CHECK:   %[[INIT:.+]] = linalg.init_tensor [%{{.+}}] : tensor<?xf32>
+//      CHECK:   %[[INIT:.+]] = tensor.empty(%{{.+}}) : tensor<?xf32>
 //      CHECK:   %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[INIT]]
 //      CHECK:   %[[RESULT:.+]] = linalg.generic
 // CHECK-SAME:     indexing_maps = [#[[MAP2]], #[[MAP3]]]
@@ -364,7 +364,7 @@
 func.func @unit_dim_for_both_reduction(%arg0: tensor<1x?x1x1xf32>) -> tensor<1x1xf32> {
   %cst = arith.constant 1.000000e+00 : f32
   %c3 = arith.constant 3 : index
-  %1 = linalg.init_tensor [1, 1] : tensor<1x1xf32>
+  %1 = tensor.empty() : tensor<1x1xf32>
   %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x1xf32>) -> tensor<1x1xf32>
   %3 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>,
@@ -382,7 +382,7 @@
 //      CHECK: func @unit_dim_for_both_reduction
 // CHECK-SAME:   %[[ARG0:.+]]: tensor<1x?x1x1xf32>
 //  CHECK-DAG:   %[[RESHAPE:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1, 2, 3]
-//      CHECK:   %[[INIT:.+]] = linalg.init_tensor [1] : tensor<1xf32>
+//      CHECK:   %[[INIT:.+]] = tensor.empty() : tensor<1xf32>
 //      CHECK:   %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[INIT]]
 //      CHECK:   %[[RESULT:.+]] = linalg.generic
 // CHECK-SAME:     indexing_maps = [#[[MAP2]], #[[MAP2]]]
@@ -398,7 +398,7 @@
   %cst = arith.constant 1.000000e+00 : f32
   %c2 = arith.constant 2 : index
   %0 = tensor.dim %arg0, %c2 : tensor<?x1x?x1xf32>
-  %1 = linalg.init_tensor [%0, 1] : tensor<?x1xf32>
+  %1 = tensor.empty(%0) : tensor<?x1xf32>
   %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<?x1xf32>) -> tensor<?x1xf32>
   %3 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>,
@@ -417,7 +417,7 @@
 //      CHECK: func @unit_dim_for_reduction_inner
 // CHECK-SAME:   %[[ARG0:.+]]: tensor<?x1x?x1xf32>
 //  CHECK-DAG:   %[[RESHAPE:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1], [2, 3]]
-//      CHECK:   %[[INIT:.+]] = linalg.init_tensor [%{{.+}}] : tensor<?xf32>
+//      CHECK:   %[[INIT:.+]] = tensor.empty(%{{.+}}) : tensor<?xf32>
 //      CHECK:   %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[INIT]]
 //      CHECK:   %[[RESULT:.+]] = linalg.generic
 // CHECK-SAME:     indexing_maps = [#[[MAP2]], #[[MAP3]]]
@@ -809,7 +809,7 @@
 #CSR = #sparse_tensor.encoding<{ dimLevelType = ["dense", "compressed"] }>
 
 func.func @sparse_case(%arg0: tensor<8x8xf32, #CSR>, %arg1: tensor<8xf32>) -> tensor<8xf32> {
-    %0 = linalg.init_tensor [8] : tensor<8xf32>
+    %0 = tensor.empty() : tensor<8xf32>
     %1 = linalg.generic #matvec
       ins(%arg0, %arg1: tensor<8x8xf32, #CSR>, tensor<8xf32>)
       outs(%0: tensor<8xf32>) {
@@ -822,7 +822,7 @@
 }
 
 // CHECK-LABEL: func @sparse_case
-//  CHECK-NEXT:   linalg.init_tensor
+//  CHECK-NEXT:   tensor.empty
 //  CHECK-NEXT:   linalg.generic
 
 // -----
@@ -831,9 +831,9 @@
   %c2 = arith.constant 2 : index
   %c4 = arith.constant 4 : index
   %cst = arith.constant 0.000000e+00 : f32
-  %0 = linalg.init_tensor [4, 2] : tensor<4x2xf32>
+  %0 = tensor.empty() : tensor<4x2xf32>
   %res = scf.foreach_thread (%arg0, %arg1) in (%c4, %c2) shared_outs(%o = %0) -> (tensor<4x2xf32>) {
-    %1 = linalg.init_tensor [1, 1] : tensor<1x1xf32>
+    %1 = tensor.empty() : tensor<1x1xf32>
     %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x1xf32>) -> tensor<1x1xf32>
     scf.foreach_thread.perform_concurrently {
       //      CHECK: tensor.parallel_insert_slice %{{[0-9a-z]*}} into %{{[0-9a-z]*}}
diff --git a/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir b/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir
--- a/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir
+++ b/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir
@@ -11,7 +11,7 @@
     %arg1 : tensor<2x3x4xi32>, %arg2 : tensor<5x6x7x8xi32>) -> tensor<2x3x4x5x6x7x8x9xi32> {
   %expand = tensor.expand_shape %arg0 [[0], [1, 2], [3], [4, 5, 6], [7]]
       : tensor<2x12x5x336x9xi32> into tensor<2x3x4x5x6x7x8x9xi32>
-  %init = linalg.init_tensor [2, 3, 4, 5, 6, 7, 8, 9] : tensor<2x3x4x5x6x7x8x9xi32>
+  %init = tensor.empty() : tensor<2x3x4x5x6x7x8x9xi32>
   %generic = linalg.generic {
     indexing_maps = [#map0, #map1, #map2, #map3],
     iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]}
@@ -31,7 +31,7 @@
 // CHECK-SAME:   %[[ARG0:.+]]: tensor<2x12x5x336x9xi32>
 // CHECK-SAME:   %[[ARG1:.+]]: tensor<2x3x4xi32>
 // CHECK-SAME:   %[[ARG2:.+]]: tensor<5x6x7x8xi32>
-//  CHECK-DAG:   %[[INIT:.+]] = linalg.init_tensor [2, 3, 4, 5, 6, 7, 8, 9]
+//  CHECK-DAG:   %[[INIT:.+]] = tensor.empty()
 //  CHECK-DAG:   %[[ARG1_RESHAPE:.+]] = tensor.collapse_shape %[[ARG1]] {{\[}}[0], [1, 2]{{\]}}
 //  CHECK-DAG:   %[[ARG2_RESHAPE:.+]] = tensor.collapse_shape %[[ARG2]] {{\[}}[0], [1, 2, 3]{{\]}}
 //  CHECK-DAG:   %[[INIT_RESHAPE:.+]] = tensor.collapse_shape %[[INIT]] {{\[}}[0], [1, 2], [3], [4, 5, 6], [7]{{\]}}
@@ -62,7 +62,7 @@
     %arg1 : tensor<2x3x4xi32>, %arg2 : tensor<5x6x7x8xi32>) -> tensor<2x3x4x5x6x7x8x9xi32> {
   %expand = tensor.expand_shape %arg0 [[0], [1, 2], [3], [4, 5, 6], [7]]
       : tensor<2x12x5x336x9xi32> into tensor<2x3x4x5x6x7x8x9xi32>
-  %init = linalg.init_tensor [2, 3, 4, 5, 6, 7, 8, 9] : tensor<2x3x4x5x6x7x8x9xi32>
+  %init = tensor.empty() : tensor<2x3x4x5x6x7x8x9xi32>
   %generic = linalg.generic {
     indexing_maps = [#map0, #map1, #map2, #map3],
     iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]}
@@ -124,7 +124,7 @@
     %arg1 : tensor<7x8x2xi32>, %arg2 : tensor<6x3x4x5xi32>) -> tensor<2x3x4x5x6x7x8x9xi32> {
   %expand = tensor.expand_shape %arg0 [[0], [1, 2], [3], [4, 5, 6], [7]]
       : tensor<9x56x2x60x6xi32> into tensor<9x7x8x2x3x4x5x6xi32>
-  %init = linalg.init_tensor [2, 3, 4, 5, 6, 7, 8, 9] : tensor<2x3x4x5x6x7x8x9xi32>
+  %init = tensor.empty() : tensor<2x3x4x5x6x7x8x9xi32>
   %generic = linalg.generic {
     indexing_maps = [#map0, #map1, #map2, #map3],
     iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]}
@@ -145,7 +145,7 @@
 // CHECK-SAME:   %[[ARG0:.+]]: tensor<9x56x2x60x6xi32>
 // CHECK-SAME:   %[[ARG1:.+]]: tensor<7x8x2xi32>
 // CHECK-SAME:   %[[ARG2:.+]]: tensor<6x3x4x5xi32>
-//  CHECK-DAG:   %[[INIT:.+]] = linalg.init_tensor [2, 3, 4, 5, 6, 7, 8, 9]
+//  CHECK-DAG:   %[[INIT:.+]] = tensor.empty()
 //  CHECK-DAG:   %[[ARG1_RESHAPE:.+]] = tensor.collapse_shape %[[ARG1]] {{\[}}[0, 1], [2]{{\]}}
 //  CHECK-DAG:   %[[ARG2_RESHAPE:.+]] = tensor.collapse_shape %[[ARG2]] {{\[}}[0], [1, 2, 3]{{\]}}
 //  CHECK-DAG:   %[[INIT_RESHAPE:.+]] = tensor.collapse_shape %[[INIT]] {{\[}}[0], [1, 2, 3], [4], [5, 6], [7]{{\]}}
@@ -176,7 +176,7 @@
   %d4 = tensor.dim %arg2, %c0 : tensor<?x?x?x?xi32>
   %d6 = tensor.dim %arg1, %c1 : tensor<?x?x?xi32>
   %d7 = tensor.dim %arg0, %c0 : tensor<?x?x?x?x?xi32>
-  %init = linalg.init_tensor [%d0, 3, %d2, 5, %d4, 7, %d6, %d7] : tensor<?x3x?x5x?x7x?x?xi32>
+  %init = tensor.empty(%d0, %d2, %d4, %d6, %d7) : tensor<?x3x?x5x?x7x?x?xi32>
   %generic = linalg.generic {
     indexing_maps = [#map0, #map1, #map2, #map3],
     iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]}
@@ -254,7 +254,7 @@
 #map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1)>
 func.func @no_fuse_unpreserved_folding(%arg0 : tensor<2x12x5xf32>, %arg1 : tensor<2x3xf32>) -> tensor<2x3x4x5xf32> {
   %0 = tensor.expand_shape %arg0 [[0], [1, 2], [3]] : tensor<2x12x5xf32> into tensor<2x3x4x5xf32>
-  %init = linalg.init_tensor [2, 3, 4, 5] : tensor<2x3x4x5xf32>
+  %init = tensor.empty(): tensor<2x3x4x5xf32>
   %1 = linalg.generic {
       indexing_maps = [#map0, #map1, #map0],
       iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
@@ -281,7 +281,7 @@
 #map2 = affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>
 func.func @no_fuse_unpreserved_folding_transpose(%arg0 : tensor<2x12x5xf32>, %arg1 : tensor<2xf32>) -> tensor<2x4x3x5xf32> {
   %0 = tensor.expand_shape %arg0 [[0], [1, 2], [3]] : tensor<2x12x5xf32> into tensor<2x3x4x5xf32>
-  %init = linalg.init_tensor [2, 4, 3, 5] : tensor<2x4x3x5xf32>
+  %init = tensor.empty() : tensor<2x4x3x5xf32>
   %1 = linalg.generic {
       indexing_maps = [#map0, #map1, #map2],
       iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
@@ -308,7 +308,7 @@
 #map2 = affine_map<(d0, d1, d2, d3) -> (d0, d3)>
 func.func @no_fuse_mismatched_iterator_types(%arg0 : tensor<2x12x5xf32>, %arg1 : tensor<2x3xf32>) -> tensor<2x5xf32> {
   %0 = tensor.expand_shape %arg0 [[0], [1, 2], [3]] : tensor<2x12x5xf32> into tensor<2x3x4x5xf32>
-  %init = linalg.init_tensor [2, 5] : tensor<2x5xf32>
+  %init = tensor.empty() : tensor<2x5xf32>
   %1 = linalg.generic {
       indexing_maps = [#map0, #map1, #map2],
       iterator_types = ["parallel", "reduction", "parallel", "parallel"]}
@@ -337,7 +337,7 @@
 func.func @control_fusion(%arg0 : tensor<6xf32>, %arg1 : tensor<20xf32>) -> tensor<2x3x4x5xf32> {
   %0 = tensor.expand_shape %arg0 [[0, 1]] : tensor<6xf32> into tensor<2x3xf32>
   %1 = tensor.expand_shape %arg1 [[0, 1]] : tensor<20xf32> into tensor<4x5xf32>
-    %init = linalg.init_tensor [2, 3, 4, 5] : tensor<2x3x4x5xf32>
+    %init = tensor.empty() : tensor<2x3x4x5xf32>
   %2 = linalg.generic {
       indexing_maps = [#map0, #map1, #map2],
       iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
@@ -370,7 +370,7 @@
 // CONTROL-SAME:     %[[ARG0:.+]]: tensor<6xf32>
 // CONTROL-SAME:     %[[ARG1:.+]]: tensor<20xf32>
 //      CONTROL:     %[[EXPAND:.+]] = tensor.expand_shape %[[ARG0]]
-//      CONTROL:     %[[INIT:.+]] = linalg.init_tensor [2, 3, 4, 5]
+//      CONTROL:     %[[INIT:.+]] = tensor.empty()
 //      CONTROL:     %[[INIT_RESHAPE:.+]] = tensor.collapse_shape %[[INIT]] {{\[}}[0], [1], [2, 3]{{\]}}
 //      CONTROL:     %[[GENERIC:.+]] = linalg.generic
 // CONTROL-SAME:         ins(%[[EXPAND]], %[[ARG1]] :
@@ -383,7 +383,7 @@
 #map = affine_map<(d0) -> (d0)>
 func.func @zero_D_test(%arg0: tensor<f32>) -> tensor<1xf32> {
   %0 = tensor.expand_shape %arg0 [] : tensor<f32> into tensor<1xf32>
-  %init = linalg.init_tensor [1] : tensor<1xf32>
+  %init = tensor.empty() : tensor<1xf32>
   %1 = linalg.generic {
       indexing_maps = [#map, #map],
       iterator_types = ["parallel"]}
@@ -444,7 +444,7 @@
   %0 = tensor.expand_shape %arg0 [[0, 1], [2, 3]] : tensor<?x?xi32> into tensor<?x4x?x8xi32>
   %d0 = tensor.dim %0, %c0 : tensor<?x4x?x8xi32>
   %d1 = tensor.dim %0, %c2 : tensor<?x4x?x8xi32>
-  %init = linalg.init_tensor [%d1, 8, %d0, 4] : tensor<?x8x?x4xi32>
+  %init = tensor.empty(%d1, %d0) : tensor<?x8x?x4xi32>
   %1 = linalg.generic {
       indexing_maps = [#map0, #map1],
       iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
@@ -468,7 +468,7 @@
 // CHECK-SAME:     %[[ARG0:.+]]: tensor<?x?xi32>)
 //  CHECK-DAG:   %[[C4:.+]] = arith.constant 4 : index
 //  CHECK-DAG:   %[[C8:.+]] = arith.constant 8 : index
-//      CHECK:   %[[INIT:.+]] = linalg.init_tensor
+//      CHECK:   %[[INIT:.+]] = tensor.empty
 //      CHECK:   %[[COLLAPSE_INIT:.+]] = tensor.collapse_shape %[[INIT]] {{\[}}[0, 1], [2, 3]{{\]}}
 //      CHECK:   %[[GENERIC:.+]] = linalg.generic
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]]]
@@ -500,7 +500,7 @@
   %c0 = arith.constant 0 : index
   %c2 = arith.constant 2 : index
   %0 = tensor.expand_shape %arg0 [[0, 1], [2, 3]] : tensor<?x?xi32> into tensor<?x4x?x8xi32>
-  %init = linalg.init_tensor [] : tensor<i32>
+  %init = tensor.empty() : tensor<i32>
   %1 = linalg.generic {
       indexing_maps = [#map0, #map1],
       iterator_types = ["reduction", "reduction", "reduction", "reduction"]}
diff --git a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir
--- a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir
@@ -10,7 +10,7 @@
   %c1 = arith.constant 1 : index
   %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
-  %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
+  %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
   %3 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
       outs(%2 : tensor<?x?xf32>) {
@@ -52,7 +52,7 @@
   %c1 = arith.constant 1 : index
   %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
-  %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
+  %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
   %3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%arg0, %arg1 : tensor<?x?xf32>, f32)
       outs(%2 : tensor<?x?xf32>) {
@@ -94,7 +94,7 @@
   %c1 = arith.constant 1 : index
   %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
-  %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
+  %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
   %3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
       outs(%2 : tensor<?x?xf32>) {
@@ -128,7 +128,7 @@
   %c1 = arith.constant 1 : index
   %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
-  %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
+  %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
   %3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
       outs(%2 : tensor<?x?xf32>) {
@@ -162,7 +162,7 @@
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
   %0 = tensor.dim %arg0, %c0 : tensor<?xf32>
-  %1 = linalg.init_tensor [%0] : tensor<?xf32>
+  %1 = tensor.empty(%0) : tensor<?xf32>
   %2 = linalg.generic {indexing_maps = [#map2, #map2, #map2], iterator_types = ["parallel"]}
       ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
       outs(%1 : tensor<?xf32>) {
@@ -173,7 +173,7 @@
   // CHECK: linalg.generic {
   // CHECK-SAME: indexing_maps = {{\[}}[[$MAP1]], [[$MAP1]], [[$MAP0]], [[$MAP0]]
   %3 = tensor.dim %arg2, %c1 : tensor<?x?xf32>
-  %4 = linalg.init_tensor [%0, %3] : tensor<?x?xf32>
+  %4 = tensor.empty(%0, %3) : tensor<?x?xf32>
   %5 = linalg.generic {indexing_maps = [#map1, #map0, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%2, %arg2 : tensor<?xf32>, tensor<?x?xf32>)
       outs(%4 : tensor<?x?xf32>){
@@ -192,7 +192,7 @@
 // CHECK-LABEL: @add_mul_scalar_fusion
 func.func @add_mul_scalar_fusion(%arg0: tensor<f32>, %arg1: tensor<f32>, %arg2: tensor<f32>) -> tensor<f32>
 {
-  %0 = linalg.init_tensor [] : tensor<f32>
+  %0 = tensor.empty() : tensor<f32>
   %1 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = []}
       ins(%arg0, %arg1 : tensor<f32>, tensor<f32>)
       outs(%0 : tensor<f32>) {
@@ -226,7 +226,7 @@
   %cst = arith.constant dense<42.0> : tensor<5xf32>
   %0 = tensor.dim %arg0, %c1 : tensor<5x?x?xf32>
   %1 = tensor.dim %arg0, %c2 : tensor<5x?x?xf32>
-  %2 = linalg.init_tensor [5, %0, %1] : tensor<5x?x?xf32>
+  %2 = tensor.empty(%0, %1) : tensor<5x?x?xf32>
   %3 = linalg.generic {
     indexing_maps = [#map0, #map1, #map1],
     iterator_types = ["parallel", "parallel", "parallel"]}
@@ -258,7 +258,7 @@
   %cst = arith.constant dense<42.0> : tensor<f32>
   %0 = tensor.dim %arg0, %c1 : tensor<5x?x?xf32>
   %1 = tensor.dim %arg0, %c2 : tensor<5x?x?xf32>
-  %2 = linalg.init_tensor [5, %0, %1] : tensor<5x?x?xf32>
+  %2 = tensor.empty(%0, %1) : tensor<5x?x?xf32>
   %3 = linalg.generic {
     indexing_maps = [#map0, #map1, #map1],
     iterator_types = ["parallel", "parallel", "parallel"]}
@@ -286,7 +286,7 @@
   %c1 = arith.constant 1 : index
   %0 = tensor.dim %arg0, %c0 : tensor<?x?xi32>
   %1 = tensor.dim %arg0, %c1 : tensor<?x?xi32>
-  %2 = linalg.init_tensor [%0, %1] : tensor<?x?xi32>
+  %2 = tensor.empty(%0, %1) : tensor<?x?xi32>
   %3 = linalg.generic {
     indexing_maps = [#map0, #map0, #map0],
     iterator_types = ["parallel", "parallel"] }
@@ -337,7 +337,7 @@
   %c1 = arith.constant 1 : index
   %0 = tensor.dim %arg0, %c0 : tensor<?x?xi32>
   %1 = tensor.dim %arg0, %c1 : tensor<?x?xi32>
-  %2 = linalg.init_tensor [%0, %1] : tensor<?x?xi32>
+  %2 = tensor.empty(%0, %1) : tensor<?x?xi32>
   %3 = linalg.generic {
     indexing_maps = [#map0, #map0],
     iterator_types = ["parallel", "parallel"] }
@@ -391,7 +391,7 @@
   %c1 = arith.constant 1 : index
   %0 = tensor.dim %arg0, %c0 : tensor<?x?xi32>
   %1 = tensor.dim %arg0, %c1 : tensor<?x?xi32>
-  %2 = linalg.init_tensor [%0, %1] : tensor<?x?xi32>
+  %2 = tensor.empty(%0, %1) : tensor<?x?xi32>
   %3 = linalg.generic {
     indexing_maps = [#map0, #map0],
     iterator_types = ["parallel", "parallel"] }
@@ -453,7 +453,7 @@
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
   %d0 = tensor.dim %arg0, %c0 : tensor<?xi32>
-  %0 = linalg.init_tensor [%d0] : tensor<?xi32>
+  %0 = tensor.empty(%d0) : tensor<?xi32>
   %1 = linalg.generic
       {indexing_maps = [#map1, #map1],
        iterator_types = ["parallel"]}
@@ -466,7 +466,7 @@
       } -> tensor<?xi32>
   %2 = tensor.dim %arg1, %c0 : tensor<?x?xi32>
   %3 = tensor.dim %arg1, %c1 : tensor<?x?xi32>
-  %4 = linalg.init_tensor [%2, %3] : tensor<?x?xi32>
+  %4 = tensor.empty(%2, %3) : tensor<?x?xi32>
   %5 = linalg.generic
       {indexing_maps = [#map2, #map3, #map2],
        iterator_types = ["parallel", "parallel"]}
@@ -499,7 +499,7 @@
 {
   %c0 = arith.constant 0 : index
   %cst = arith.constant dense<1.000000e+00> : tensor<10xf32>
-  %0 = linalg.init_tensor [] : tensor<f32>
+  %0 = tensor.empty() : tensor<f32>
   %1 = linalg.generic
     {indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>],
      iterator_types = []}
@@ -509,7 +509,7 @@
       %4 = tensor.extract %arg0[%3, %c0, %c0] : tensor<5x1x1xf32>
       linalg.yield %4 : f32
     } -> tensor<f32>
-  %2 = linalg.init_tensor [10] : tensor<10xf32>
+  %2 = tensor.empty() : tensor<10xf32>
   %3 = linalg.generic
    {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>,
                      affine_map<(d0) -> (d0)>],
@@ -538,7 +538,7 @@
 
 func.func @constant_fusion(%arg0 : tensor<4xf32>) -> (tensor<4xf32>) {
   %cst = arith.constant dense<1.0> : tensor<4xf32>
-  %1 = linalg.init_tensor [4] : tensor<4xf32>
+  %1 = tensor.empty() : tensor<4xf32>
   %2 = linalg.generic
     {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>,
                       affine_map<(d0) -> (d0)>],
@@ -555,7 +555,7 @@
 //  CHECK-DAG: #[[MAP:.+]] = affine_map<(d0) -> (d0)>
 //      CHECK: func @constant_fusion(%[[ARG0:.+]]: tensor<4xf32>)
 //  CHECK-DAG:   %[[CST:.+]] = arith.constant 1.000000e+00 : f32
-//  CHECK-DAG:   %[[T0:.+]] = linalg.init_tensor [4] : tensor<4xf32>
+//  CHECK-DAG:   %[[T0:.+]] = tensor.empty() : tensor<4xf32>
 //      CHECK:   %[[T1:.+]] = linalg.generic
 // CHECK-SAME:     indexing_maps = [#[[MAP]], #[[MAP]]]
 // CHECK-SAME:     ins(%[[ARG0]] : tensor<4xf32>)
@@ -574,7 +574,7 @@
 func.func @consumer_with_reduction(%arg0: tensor<1x10xf32>,
                               %arg1: tensor<1x10xf32>,
                               %arg2: tensor<1xf32>) -> tensor<1xf32> {
-  %init = linalg.init_tensor [1, 10] : tensor<1x10xf32>
+  %init = tensor.empty() : tensor<1x10xf32>
   %0 = linalg.generic
     {indexing_maps = [#map0, #map0, #map0],
      iterator_types = ["parallel", "parallel"]}
@@ -620,7 +620,7 @@
   %shape = shape.shape_of %0 : tensor<?x1xf32> -> tensor<?xindex>
   %extend = shape.to_extent_tensor %shape : tensor<?xindex> -> tensor<2xindex>
   %extracted = tensor.extract %extend[%c0] : tensor<2xindex>
-  %init0 = linalg.init_tensor [%extracted, 1] : tensor<?x1xf32>
+  %init0 = tensor.empty(%extracted) : tensor<?x1xf32>
   %1 = linalg.generic {indexing_maps = [
     affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
@@ -630,7 +630,7 @@
       linalg.yield %cp5 : f32
   } -> tensor<?x1xf32>
   %d0 = tensor.dim %0, %c0 : tensor<?x1xf32>
-  %init1 = linalg.init_tensor [%d0, 1] : tensor<?x1xf32>
+  %init1 = tensor.empty(%d0) : tensor<?x1xf32>
   %2 = linalg.generic {indexing_maps = [
     affine_map<(d0, d1) -> (d0, d1)>,
     affine_map<(d0, d1) -> (d0, d1)>,
@@ -692,7 +692,7 @@
   // CHECK-SAME:   ins(%[[CONST]] : tensor<3x2xf32>)
   //      CHECK: return %[[RESULT]]
   %three = arith.constant dense<3.0> : tensor<3x2xf32>
-  %init = linalg.init_tensor [3] : tensor<3xf32>
+  %init = tensor.empty() : tensor<3xf32>
   %result = linalg.generic {
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                        affine_map<(d0, d1) -> (d0)>],
@@ -732,12 +732,12 @@
 //  CHECK-DAG:   %[[C1:.+]] = arith.constant 1 : index
 //  CHECK-DAG:   %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
 //  CHECK-DAG:   %[[D1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
-//  CHECK-DAG:   %[[INIT:.+]] = linalg.init_tensor [%[[D0]], %[[D1]]]
+//  CHECK-DAG:   %[[INIT:.+]] = tensor.empty(%[[D0]], %[[D1]])
 //      CHECK:   %[[GENERIC1:.+]] = linalg.generic
 // CHECK-SAME:     outs(%[[INIT]] : tensor<?x?xf32>)
 //  CHECK-DAG:   %[[D0:.+]] = tensor.dim %[[GENERIC1]], %[[C0]]
 //  CHECK-DAG:   %[[D1:.+]] = tensor.dim %[[GENERIC1]], %[[C1]]
-//  CHECK-DAG:   %[[INIT:.+]] = linalg.init_tensor [%[[D0]], %[[D1]]]
+//  CHECK-DAG:   %[[INIT:.+]] = tensor.empty(%[[D0]], %[[D1]])
 //      CHECK:   %[[RESULT:.+]] = linalg.generic
 // CHECK-SAME:     outs(%[[INIT]] : tensor<?x?xf32>)
 
@@ -750,8 +750,8 @@
   %c1 = arith.constant 1 : index
   %d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %d1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
-  %0 = linalg.init_tensor[%d0, %d1] : tensor<?x?xf32>
-  %1 = linalg.init_tensor[%d0, %d1] : tensor<?x?xi32>
+  %0 = tensor.empty(%d0, %d1) : tensor<?x?xf32>
+  %1 = tensor.empty(%d0, %d1) : tensor<?x?xi32>
   %2:2 = linalg.generic {
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                        affine_map<(d0, d1) -> ()>,
@@ -925,7 +925,7 @@
 // CHECK: linalg.generic
 func.func @no_fusion_missing_reduction_shape(%arg0: tensor<f32>, %arg1: index) -> tensor<?xf32> {
   %cst = arith.constant 0xFF800000 : f32
-  %4 = linalg.init_tensor [%arg1, %arg1] : tensor<?x?xf32>
+  %4 = tensor.empty(%arg1, %arg1) : tensor<?x?xf32>
   %5 = linalg.generic {
     indexing_maps = [#map0, #map1],
     iterator_types = ["parallel", "parallel"]
@@ -933,7 +933,7 @@
   ^bb0(%arg2: f32, %arg3: f32):  
     linalg.yield %arg2 : f32
   } -> tensor<?x?xf32>
-  %6 = linalg.init_tensor [%arg1] : tensor<?xf32>
+  %6 = tensor.empty(%arg1) : tensor<?xf32>
   %7 = linalg.fill ins(%cst : f32) outs(%6 : tensor<?xf32>) -> tensor<?xf32>
   %8 = linalg.generic {
     indexing_maps = [#map2, #map3],
@@ -959,7 +959,7 @@
           %23 = arith.index_cast %22 : index to i64
           linalg.yield %23 : i64
         } -> tensor<5000xi64>
-  %1 = linalg.init_tensor [5000] : tensor<5000xi32>
+  %1 = tensor.empty() : tensor<5000xi32>
   %2 = linalg.generic {
         indexing_maps = [affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d1)>],
         iterator_types = ["parallel", "parallel"]}
@@ -976,8 +976,8 @@
 //      CHECK: func @fusion_different_axes(
 // CHECK-SAME:     %[[ARG0:.+]]: tensor<5000xi64>
 // CHECK-SAME:     %[[ARG1:.+]]: tensor<5000xi32>
-//  CHECK-DAG:   %[[INIT0:.+]] = linalg.init_tensor [5000] : tensor<5000xi64>
-//  CHECK-DAG:   %[[INIT1:.+]] = linalg.init_tensor [5000] : tensor<5000xi32>
+//  CHECK-DAG:   %[[INIT0:.+]] = tensor.empty() : tensor<5000xi64>
+//  CHECK-DAG:   %[[INIT1:.+]] = tensor.empty() : tensor<5000xi32>
 //      CHECK:   %[[RESULT:.+]]:2 = linalg.generic
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]]]
 // CHECK-SAME:       outs(%[[INIT0]], %[[INIT1]] :
@@ -1004,9 +1004,9 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 7.0 : f32
   %0 = tensor.dim %arg0, %c0 : tensor<?xf32>
-  %1 = linalg.init_tensor [%0] : tensor<?xf32>
+  %1 = tensor.empty(%0) : tensor<?xf32>
   %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<?xf32>) -> tensor<?xf32>
-  %3 = linalg.init_tensor [%0] : tensor<?xf32>
+  %3 = tensor.empty(%0) : tensor<?xf32>
   %4 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types=["parallel"]} ins(%arg0, %2 : tensor<?xf32>, tensor<?xf32>) outs (%3:tensor<?xf32>) {
   ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
     %5 = arith.addf  %arg1, %arg2 : f32
@@ -1031,11 +1031,11 @@
   %cst2 = arith.constant 6.0 : f32
   %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
-  %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
+  %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
   %3 = linalg.fill ins(%cst1 : f32) outs(%2 : tensor<?x?xf32>) -> tensor<?x?xf32>
-  %4 = linalg.init_tensor [%1, %0] : tensor<?x?xf32>
+  %4 = tensor.empty(%1, %0) : tensor<?x?xf32>
   %5 = linalg.fill ins(%cst2 : f32) outs(%4 : tensor<?x?xf32>) -> tensor<?x?xf32>
-  %6 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
+  %6 = tensor.empty(%0, %1) : tensor<?x?xf32>
   %7 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types=["parallel","parallel"]} ins(%3, %5 : tensor<?x?xf32>, tensor<?x?xf32>) outs (%6:tensor<?x?xf32>) {
   ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
     %8 = arith.divf  %arg1, %arg2 : f32
@@ -1049,8 +1049,8 @@
 #map = affine_map<() -> ()>
 module {
   func.func @fuse_multi_result_producer(%arg0: tensor<f32>, %arg1: tensor<f32>, %arg2: tensor<f32>, %arg3: tensor<f32>, %arg4: tensor<f32>) -> tensor<f32> {
-    %0 = linalg.init_tensor [] : tensor<f32>
-    %1 = linalg.init_tensor [] : tensor<f32>
+    %0 = tensor.empty() : tensor<f32>
+    %1 = tensor.empty() : tensor<f32>
     %2:2 = linalg.generic {
       indexing_maps = [#map, #map, #map, #map, #map], iterator_types = []}
       ins(%arg0, %arg1, %arg1 : tensor<f32>, tensor<f32>, tensor<f32>) outs(%0, %1 : tensor<f32>, tensor<f32>) {
@@ -1073,7 +1073,7 @@
 // CHECK-LABEL: func.func @fuse_multi_result_producer
 //  CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]: tensor<f32>
 //  CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]: tensor<f32>
-//       CHECK:   %[[INIT:.+]] = linalg.init_tensor
+//       CHECK:   %[[INIT:.+]] = tensor.empty
 //       CHECK:   %[[GENERIC:.+]] = linalg.generic
 //  CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]] :
 //  CHECK-SAME:       outs(%[[INIT]] :
diff --git a/mlir/test/Dialect/Linalg/fusion-elementwise-options.mlir b/mlir/test/Dialect/Linalg/fusion-elementwise-options.mlir
--- a/mlir/test/Dialect/Linalg/fusion-elementwise-options.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-elementwise-options.mlir
@@ -18,7 +18,7 @@
   %c1 = arith.constant 1 : index
   %d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %d1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
-  %init = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
+  %init = tensor.empty(%d0, %d1) : tensor<?x?xf32>
   %0 = linalg.generic #binary2Dpointwise
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
       outs(%init : tensor<?x?xf32>) {
diff --git a/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir b/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir
--- a/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir
@@ -34,7 +34,7 @@
 
 // CHECK-LABEL: func @reshape_multiple
 // CHECK-SAME: (%[[A:.*]]: tensor<12544x16xf32>, %[[B:.*]]: tensor<12544x16xf32>, %[[C:.*]]: tensor<16xf32>)
-//      CHECK: %[[I:.*]] = linalg.init_tensor [112, 112, 16] : tensor<112x112x16xf32>
+//      CHECK: %[[I:.*]] = tensor.empty() : tensor<112x112x16xf32>
 //      CHECK: %[[RI:.*]] = tensor.collapse_shape %[[I]] {{\[}}[0, 1], [2]] : tensor<112x112x16xf32> into tensor<12544x16xf32>
 //      CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP2]], #[[$MAP3]], #[[$MAP2]]],
 // CHECK-SAME: iterator_types = ["parallel", "parallel"]}
@@ -47,7 +47,7 @@
       : tensor<12544x16xf32> into tensor<112x112x16xf32>
   %1 = tensor.expand_shape %B [[0, 1], [2]]
       : tensor<12544x16xf32> into tensor<112x112x16xf32>
-  %2 = linalg.init_tensor [112, 112, 16] : tensor<112x112x16xf32>
+  %2 = tensor.empty() : tensor<112x112x16xf32>
   %3 = linalg.generic {indexing_maps = [
     affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
     affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
@@ -75,7 +75,7 @@
 func.func @reshape_negative(%A: tensor<12544x16xf32>, %B: tensor<112xf32>) -> tensor<112x112x16xf32> {
   %20 = tensor.expand_shape %A [[0, 1], [2]]
       : tensor<12544x16xf32> into tensor<112x112x16xf32>
-  %21 = linalg.init_tensor [112, 112, 16] : tensor<112x112x16xf32>
+  %21 = tensor.empty() : tensor<112x112x16xf32>
   %22 = linalg.generic {indexing_maps = [
     affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d1)>,
     affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
@@ -98,7 +98,7 @@
   %cst_8 = arith.constant 1.1920929E-7 : f32
   %25 = tensor.expand_shape %arg0 [[0, 1], [2]]
       : tensor<6x5xi32> into tensor<2x3x5xi32>
-  %26 = linalg.init_tensor [2, 3, 5] : tensor<2x3x5xf32>
+  %26 = tensor.empty() : tensor<2x3x5xf32>
   %28 = linalg.generic {
       indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
                        affine_map<(d0, d1, d2) -> (d2)>,
diff --git a/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir b/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir
--- a/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir
+++ b/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir
@@ -3,7 +3,7 @@
 // CHECK-LABEL:   func @generalize_pad_tensor_static_shape(
 // CHECK-SAME:                                             %[[IN:.*]]: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> {
 // CHECK:           %[[C0:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK:           %[[INIT:.*]] = linalg.init_tensor [1, 32, 32, 1] : tensor<1x32x32x1xf32>
+// CHECK:           %[[INIT:.*]] = tensor.empty() : tensor<1x32x32x1xf32>
 // CHECK:           %[[FILL:.*]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x32x32x1xf32>) -> tensor<1x32x32x1xf32>
 // CHECK:           %[[PADDED:.*]] = tensor.insert_slice %[[IN]] into %[[FILL]][0, 2, 2, 0] [1, 28, 28, 1] [1, 1, 1, 1] : tensor<1x28x28x1xf32> into tensor<1x32x32x1xf32>
 // CHECK:           return %[[PADDED]] : tensor<1x32x32x1xf32>
@@ -28,7 +28,7 @@
 // CHECK:           %[[OUT_DIM2:.*]] = arith.addi %[[OFFSET]], %[[C2]] : index
 // CHECK:           %[[DIM3:.*]] = tensor.dim %[[IN]], %[[C3]] : tensor<4x?x2x?xf32>
 // CHECK:           %[[OUT_DIM3:.*]] = arith.addi %[[DIM3]], %[[OFFSET]] : index
-// CHECK:           %[[INIT:.*]] = linalg.init_tensor [4, %[[DIM1]], %[[OUT_DIM2]], %[[OUT_DIM3]]] : tensor<4x?x?x?xf32>
+// CHECK:           %[[INIT:.*]] = tensor.empty(%[[DIM1]], %[[OUT_DIM2]], %[[OUT_DIM3]]) : tensor<4x?x?x?xf32>
 // CHECK:           %[[FILL:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[INIT]] : tensor<4x?x?x?xf32>) -> tensor<4x?x?x?xf32>
 // CHECK:           %[[DIM1_1:.*]] = tensor.dim %[[IN]], %[[C1]] : tensor<4x?x2x?xf32>
 // CHECK:           %[[DIM3_1:.*]] = tensor.dim %[[IN]], %[[C3]] : tensor<4x?x2x?xf32>
diff --git a/mlir/test/Dialect/Linalg/inline-scalar-operands.mlir b/mlir/test/Dialect/Linalg/inline-scalar-operands.mlir
--- a/mlir/test/Dialect/Linalg/inline-scalar-operands.mlir
+++ b/mlir/test/Dialect/Linalg/inline-scalar-operands.mlir
@@ -6,7 +6,7 @@
 
 // CHECK: func @inline_zerod(%[[ARG:.*]]: tensor<4xf32>, %[[SCALAR:.*]]: tensor<f32>)
 func.func @inline_zerod(%arg0: tensor<4xf32>, %scalar: tensor<f32>) -> tensor<4xf32> {
-    %0 = linalg.init_tensor [4] : tensor<4xf32>
+    %0 = tensor.empty() : tensor<4xf32>
     // CHECK: linalg.generic {indexing_maps = [#[[MAP]], #[[MAP]]],
     // CHECK-SAME: iterator_types = ["parallel"]} ins(%[[ARG]] : tensor<4xf32>)
     %1 = linalg.generic {indexing_maps = [#map2, #map3, #map2],
@@ -31,7 +31,7 @@
 // CHECK: func @inline_oned(%[[ARG:.*]]: tensor<4xf32>, %[[SCALAR:.*]]: tensor<1xf32>)
 func.func @inline_oned(%arg0: tensor<4xf32>, %scalar: tensor<1xf32>) -> tensor<4xf32> {
     // CHECK: %[[ZERO:.*]] = arith.constant 0 : index
-    %0 = linalg.init_tensor [4] : tensor<4xf32>
+    %0 = tensor.empty() : tensor<4xf32>
     // CHECK: linalg.generic {indexing_maps = [#[[MAP]], #[[MAP]]],
     // CHECK-SAME: iterator_types = ["parallel"]} ins(%[[ARG]] : tensor<4xf32>)
     %1 = linalg.generic {indexing_maps = [#map2, #map3, #map2],
diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir
--- a/mlir/test/Dialect/Linalg/invalid.mlir
+++ b/mlir/test/Dialect/Linalg/invalid.mlir
@@ -323,36 +323,9 @@
 
 // -----
 
-func.func @init_tensor_err(%arg0 : index, %arg1 : index)
-{
-  // expected-error @+1 {{specified type 'tensor<4x?x?x5xf32>' does not match the inferred type 'tensor<4x5x?x?xf32>'}}
-  %1 = linalg.init_tensor [4, 5, %arg0, %arg1] : tensor<4x?x?x5xf32>
-  return
-}
-
-// -----
-
-func.func @init_tensor_err(%arg0 : index)
-{
-  // expected-error @+1 {{expected 4 sizes values}}
-  %1 = linalg.init_tensor [4, 5, %arg0] : tensor<4x?x?x5xf32>
-  return
-}
-
-// -----
-
-func.func @init_tensor_err(%arg0 : index)
-{
-  // expected-error @+1 {{expected 2 dynamic sizes values}}
-  %1 = "linalg.init_tensor"(%arg0) {static_sizes = [4, -1, -1, 5]} : (index) -> tensor<4x?x?x5xf32>
-  return
-}
-
-// -----
-
 func.func @illegal_fill_tensor_no_return(%arg0 : index, %arg1 : index, %arg2 : f32)
 {
-  %0 = linalg.init_tensor [%arg0, %arg1] : tensor<?x?xf32>
+  %0 = tensor.empty(%arg0, %arg1) : tensor<?x?xf32>
   // expected-error @+1 {{expected the number of results (0) to be equal to the number of output tensors (1)}}
   linalg.fill ins(%arg2 : f32) outs(%0 : tensor<?x?xf32>)
 }
diff --git a/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir b/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir
--- a/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir
+++ b/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir
@@ -52,7 +52,7 @@
 
 // CHECK:      %[[ARG0:[a-zA-Z0-9_]+]]: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32>
 // CHECK:      %[[CTE:.+]] = arith.constant 0.000000e+00 : f32
-// CHECK:      %[[TMP:.+]] = linalg.init_tensor [1, 32, 32, 1] : tensor<1x32x32x1xf32>
+// CHECK:      %[[TMP:.+]] = tensor.empty() : tensor<1x32x32x1xf32>
 // CHECK:      %[[R1c:.+]] = linalg.fill
 // CHECK:      %[[R2c:.+]] = linalg.generic
 // CHECK-SAME:   indexing_maps = [#[[$MAP4]], #[[$MAP5]]]
diff --git a/mlir/test/Dialect/Linalg/named-ops.mlir b/mlir/test/Dialect/Linalg/named-ops.mlir
--- a/mlir/test/Dialect/Linalg/named-ops.mlir
+++ b/mlir/test/Dialect/Linalg/named-ops.mlir
@@ -3,7 +3,7 @@
 // CHECK-LABEL: func @depthwise_conv_1d_nwc_wcm
 func.func @depthwise_conv_1d_nwc_wcm(%input: tensor<1x12x8xf32>, %filter: tensor<3x8x8xf32>) -> tensor<1x10x8x8xf32> {
   %zero = arith.constant 0.000000e+00 : f32
-  %init = linalg.init_tensor [1, 10, 8, 8] : tensor<1x10x8x8xf32>
+  %init = tensor.empty() : tensor<1x10x8x8xf32>
   %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<1x10x8x8xf32>) -> tensor<1x10x8x8xf32>
   // CHECK: depthwise_conv_1d_nwc_wcm
   %0 = linalg.depthwise_conv_1d_nwc_wcm {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
@@ -17,7 +17,7 @@
 // CHECK-LABEL: func @depthwise_conv_1d_nwc_wc
 func.func @depthwise_conv_1d_nwc_wc(%input: tensor<1x12x8xf32>, %filter: tensor<3x8xf32>) -> tensor<1x10x8xf32> {
   %zero = arith.constant 0.000000e+00 : f32
-  %init = linalg.init_tensor [1, 10, 8] : tensor<1x10x8xf32>
+  %init = tensor.empty() : tensor<1x10x8xf32>
   %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<1x10x8xf32>) -> tensor<1x10x8xf32>
   // CHECK: depthwise_conv_1d_nwc_wc
   %0 = linalg.depthwise_conv_1d_nwc_wc {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
@@ -31,7 +31,7 @@
 // CHECK-LABEL: func @depthwise_conv_2d_nhwc_hwcm_tensor
 func.func @depthwise_conv_2d_nhwc_hwcm_tensor(%input: tensor<2x4x5x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x3x4x2x3xf32> {
   %zero = arith.constant 0.000000e+00 : f32
-  %init = linalg.init_tensor [2, 3, 4, 2, 3] : tensor<2x3x4x2x3xf32>
+  %init = tensor.empty() : tensor<2x3x4x2x3xf32>
   %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x3x4x2x3xf32>) -> tensor<2x3x4x2x3xf32>
   // CHECK:      %{{.+}} = linalg.depthwise_conv_2d_nhwc_hwcm
   // CHECK-SAME:   {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
@@ -59,7 +59,7 @@
 
 // CHECK-LABEL: func @depthwise_conv_1d_nw_tensor
 func.func @depthwise_conv_1d_nw_tensor(%input: tensor<1x113x96xf32>, %filter: tensor<3x96xf32>) -> tensor<1x56x96xf32> {
-  %init = linalg.init_tensor [1, 56, 96] : tensor<1x56x96xf32>
+  %init = tensor.empty() : tensor<1x56x96xf32>
   // CHECK:      %{{.+}} = linalg.depthwise_conv_1d_nw
   // CHECK-SAME:   {dilations = dense<1> : vector<1xi64>, strides = dense<2> : vector<1xi64>}
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<1x113x96xf32>, tensor<3x96xf32>)
@@ -72,7 +72,7 @@
 
 // CHECK-LABEL: func @depthwise_conv_2d_nhwc_hwc_tensor
 func.func @depthwise_conv_2d_nhwc_hwc_tensor(%input: tensor<1x113x113x96xf32>, %filter: tensor<3x3x96xf32>) -> tensor<1x56x56x96xf32> {
-  %init = linalg.init_tensor [1, 56, 56, 96] : tensor<1x56x56x96xf32>
+  %init = tensor.empty() : tensor<1x56x56x96xf32>
   // CHECK:      %{{.+}} = linalg.depthwise_conv_2d_nhwc_hwc
   // CHECK-SAME:   {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>}
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<1x113x113x96xf32>, tensor<3x3x96xf32>)
@@ -97,7 +97,7 @@
 
 // CHECK-LABEL: func @depthwise_conv_2d_nchw_chw_tensor
 func.func @depthwise_conv_2d_nchw_chw_tensor(%input: tensor<1x96x113x113xf32>, %filter: tensor<96x3x3xf32>) -> tensor<1x96x56x56xf32> {
-  %init = linalg.init_tensor [1, 96, 56, 56] : tensor<1x96x56x56xf32>
+  %init = tensor.empty() : tensor<1x96x56x56xf32>
   // CHECK:      %{{.+}} = linalg.depthwise_conv_2d_nchw_chw
   // CHECK-SAME:   {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>}
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<1x96x113x113xf32>, tensor<96x3x3xf32>)
@@ -122,7 +122,7 @@
 
 func.func @depthwise_conv_2d_nhwc_hwcm_tensor_dilated(%input: tensor<2x8x9x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x6x7x2x3xf32> {
   %zero = arith.constant 0.000000e+00 : f32
-  %init = linalg.init_tensor [2, 6, 7, 2, 3] : tensor<2x6x7x2x3xf32>
+  %init = tensor.empty() : tensor<2x6x7x2x3xf32>
   %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x6x7x2x3xf32>) -> tensor<2x6x7x2x3xf32>
   // CHECK:      %{{.+}} = linalg.depthwise_conv_2d_nhwc_hwcm
   // CHECK-SAME:   {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
@@ -186,7 +186,7 @@
 // CHECK-LABEL: func @depthwise_conv_3d_ndhwc_dhwcm
 func.func @depthwise_conv_3d_ndhwc_dhwcm(%input: tensor<2x6x13x12x6xf32>, %filter: tensor<2x1x3x6x6xf32>) -> tensor<2x3x13x4x6x6xf32> {
   %zero = arith.constant 0.000000e+00 : f32
-  %init = linalg.init_tensor [2, 3, 13, 4, 6, 6] : tensor<2x3x13x4x6x6xf32>
+  %init = tensor.empty() : tensor<2x3x13x4x6x6xf32>
   %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x3x13x4x6x6xf32>) -> tensor<2x3x13x4x6x6xf32>
   // CHECK: depthwise_conv_3d_ndhwc_dhwcm
   %0 = linalg.depthwise_conv_3d_ndhwc_dhwcm {dilations = dense<1> : tensor<3xi64>, strides = dense<[2, 1, 3]> : tensor<3xi64>}
@@ -200,7 +200,7 @@
 // CHECK-LABEL: func @depthwise_conv_3d_ndhwc_dhwc
 func.func @depthwise_conv_3d_ndhwc_dhwc(%input: tensor<2x6x13x12x6xf32>, %filter: tensor<2x1x3x6xf32>) -> tensor<2x3x13x4x6xf32> {
   %zero = arith.constant 0.000000e+00 : f32
-  %init = linalg.init_tensor [2, 3, 13, 4, 6] : tensor<2x3x13x4x6xf32>
+  %init = tensor.empty() : tensor<2x3x13x4x6xf32>
   %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x3x13x4x6xf32>) -> tensor<2x3x13x4x6xf32>
   // CHECK: depthwise_conv_3d_ndhwc_dhwc
   %0 = linalg.depthwise_conv_3d_ndhwc_dhwc {dilations = dense<1> : tensor<3xi64>, strides = dense<[2, 1, 3]> : tensor<3xi64>}
@@ -410,8 +410,8 @@
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xf32>, tensor<3x3xf32>)
 // CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
 func.func @pooling_nhwc_sum_tensor(%input: tensor<1x4x4x1xf32>) -> tensor<1x2x2x1xf32> {
-  %fake = linalg.init_tensor [3, 3] : tensor<3x3xf32>
-  %init = linalg.init_tensor [1, 2, 2, 1] : tensor<1x2x2x1xf32>
+  %fake = tensor.empty() : tensor<3x3xf32>
+  %init = tensor.empty() : tensor<1x2x2x1xf32>
   %cst = arith.constant 0.000000e+00 : f32
   %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
   %res = linalg.pooling_nhwc_sum {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
@@ -444,8 +444,8 @@
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x1x4x4xf32>, tensor<3x3xf32>)
 // CHECK-SAME:      outs(%{{.+}} : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
 func.func @pooling_nchw_sum_tensor(%input: tensor<1x1x4x4xf32>) -> tensor<1x1x2x2xf32> {
-  %fake = linalg.init_tensor [3, 3] : tensor<3x3xf32>
-  %init = linalg.init_tensor [1, 1, 2, 2] : tensor<1x1x2x2xf32>
+  %fake = tensor.empty() : tensor<3x3xf32>
+  %init = tensor.empty() : tensor<1x1x2x2xf32>
   %cst = arith.constant 0.000000e+00 : f32
   %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
   %res = linalg.pooling_nchw_sum {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
@@ -478,8 +478,8 @@
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xf32>, tensor<3x3xf32>)
 // CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
 func.func @pooling_nhwc_max_tensor(%input: tensor<1x4x4x1xf32>) -> tensor<1x2x2x1xf32> {
-  %fake = linalg.init_tensor [3, 3] : tensor<3x3xf32>
-  %init = linalg.init_tensor [1, 2, 2, 1] : tensor<1x2x2x1xf32>
+  %fake = tensor.empty() : tensor<3x3xf32>
+  %init = tensor.empty() : tensor<1x2x2x1xf32>
   %cst = arith.constant 0.000000e+00 : f32
   %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
   %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
@@ -497,8 +497,8 @@
 // CHECK-SAME:      outs(%{{.+}} : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
 
 func.func @pooling_nchw_max_tensor(%input: tensor<1x1x4x4xf32>) -> tensor<1x1x2x2xf32> {
-  %fake = linalg.init_tensor [3, 3] : tensor<3x3xf32>
-  %init = linalg.init_tensor [1, 1, 2, 2] : tensor<1x1x2x2xf32>
+  %fake = tensor.empty() : tensor<3x3xf32>
+  %init = tensor.empty() : tensor<1x1x2x2xf32>
   %cst = arith.constant 0.000000e+00 : f32
   %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
   %res = linalg.pooling_nchw_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
@@ -531,8 +531,8 @@
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xi8>, tensor<3x3xi8>)
 // CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8>
 func.func @pooling_nhwc_i8_max_tensor(%input: tensor<1x4x4x1xi8>) -> tensor<1x2x2x1xi8> {
-  %fake = linalg.init_tensor [3, 3] : tensor<3x3xi8>
-  %init = linalg.init_tensor [1, 2, 2, 1] : tensor<1x2x2x1xi8>
+  %fake = tensor.empty() : tensor<3x3xi8>
+  %init = tensor.empty() : tensor<1x2x2x1xi8>
   %cst = arith.constant 0 : i8
   %fill = linalg.fill ins(%cst : i8) outs(%init : tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8>
   %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
@@ -565,8 +565,8 @@
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xi16>, tensor<3x3xi16>)
 // CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16>
 func.func @pooling_nhwc_i16_max_tensor(%input: tensor<1x4x4x1xi16>) -> tensor<1x2x2x1xi16> {
-  %fake = linalg.init_tensor [3, 3] : tensor<3x3xi16>
-  %init = linalg.init_tensor [1, 2, 2, 1] : tensor<1x2x2x1xi16>
+  %fake = tensor.empty() : tensor<3x3xi16>
+  %init = tensor.empty() : tensor<1x2x2x1xi16>
   %cst = arith.constant 0 : i16
   %fill = linalg.fill ins(%cst : i16) outs(%init : tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16>
   %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
@@ -599,8 +599,8 @@
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xi32>, tensor<3x3xi32>)
 // CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32>
 func.func @pooling_nhwc_i32_max_tensor(%input: tensor<1x4x4x1xi32>) -> tensor<1x2x2x1xi32> {
-  %fake = linalg.init_tensor [3, 3] : tensor<3x3xi32>
-  %init = linalg.init_tensor [1, 2, 2, 1] : tensor<1x2x2x1xi32>
+  %fake = tensor.empty() : tensor<3x3xi32>
+  %init = tensor.empty() : tensor<1x2x2x1xi32>
   %cst = arith.constant 0 : i32
   %fill = linalg.fill ins(%cst : i32) outs(%init : tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32>
   %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
@@ -634,8 +634,8 @@
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xf32>, tensor<3x3xf32>)
 // CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
 func.func @pooling_nhwc_min_tensor(%input: tensor<1x4x4x1xf32>) -> tensor<1x2x2x1xf32> {
-  %fake = linalg.init_tensor [3, 3] : tensor<3x3xf32>
-  %init = linalg.init_tensor [1, 2, 2, 1] : tensor<1x2x2x1xf32>
+  %fake = tensor.empty() : tensor<3x3xf32>
+  %init = tensor.empty() : tensor<1x2x2x1xf32>
   %cst = arith.constant 0.000000e+00 : f32
   %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
   %res = linalg.pooling_nhwc_min {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
@@ -668,8 +668,8 @@
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>)
 // CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
 func.func @pooling_ndhwc_sum_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> {
-  %fake = linalg.init_tensor [3, 3, 3] : tensor<3x3x3xf32>
-  %init = linalg.init_tensor [1, 2, 2, 2, 1] : tensor<1x2x2x2x1xf32>
+  %fake = tensor.empty() : tensor<3x3x3xf32>
+  %init = tensor.empty() : tensor<1x2x2x2x1xf32>
   %cst = arith.constant 0.000000e+00 : f32
   %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
   %res = linalg.pooling_ndhwc_sum {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
@@ -702,8 +702,8 @@
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>)
 // CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
 func.func @pooling_ndhwc_max_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> {
-  %fake = linalg.init_tensor [3, 3, 3] : tensor<3x3x3xf32>
-  %init = linalg.init_tensor [1, 2, 2, 2, 1] : tensor<1x2x2x2x1xf32>
+  %fake = tensor.empty() : tensor<3x3x3xf32>
+  %init = tensor.empty() : tensor<1x2x2x2x1xf32>
   %cst = arith.constant 0.000000e+00 : f32
   %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
   %res = linalg.pooling_ndhwc_max {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
@@ -736,8 +736,8 @@
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>)
 // CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
 func.func @pooling_ndhwc_min_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> {
-  %fake = linalg.init_tensor [3, 3, 3] : tensor<3x3x3xf32>
-  %init = linalg.init_tensor [1, 2, 2, 2, 1] : tensor<1x2x2x2x1xf32>
+  %fake = tensor.empty() : tensor<3x3x3xf32>
+  %init = tensor.empty() : tensor<1x2x2x2x1xf32>
   %cst = arith.constant 0.000000e+00 : f32
   %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
   %res = linalg.pooling_ndhwc_min {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir
--- a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir
+++ b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-opt %s -eliminate-alloc-tensors -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs" -split-input-file | FileCheck %s
 
 //===----------------------------------------------------------------------===//
-// InitTensorOp elimination
+// AllocTensorOp elimination
 //===----------------------------------------------------------------------===//
 
 // CHECK-LABEL: func @buffer_forwarding_conflict
diff --git a/mlir/test/Dialect/Linalg/pad_fusion.mlir b/mlir/test/Dialect/Linalg/pad_fusion.mlir
--- a/mlir/test/Dialect/Linalg/pad_fusion.mlir
+++ b/mlir/test/Dialect/Linalg/pad_fusion.mlir
@@ -6,7 +6,7 @@
   %c1 = arith.constant 1 : index
   %d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %d1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
-  %init = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
+  %init = tensor.empty(%d0, %d1) : tensor<?x?xf32>
   %0 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]} 
@@ -37,7 +37,7 @@
 //  CHECK-DAG:   %[[TARGET_D0:.+]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[ARG3]], %[[SOURCE_D0]]]
 //  CHECK-DAG:   %[[SOURCE_D1:.+]] = tensor.dim %[[SOURCE]], %[[C1]]
 //  CHECK-DAG:   %[[TARGET_D1:.+]] = affine.apply #[[MAP]]()[%[[ARG2]], %[[ARG4]], %[[SOURCE_D1]]]
-//      CHECK:   %[[INIT:.+]] = linalg.init_tensor [%[[TARGET_D0]], %[[TARGET_D1]]] 
+//      CHECK:   %[[INIT:.+]] = tensor.empty(%[[TARGET_D0]], %[[TARGET_D1]]) 
 //      CHECK:   %[[FILL:.+]] = linalg.fill ins(%[[ARG5]]{{.*}}outs(%[[INIT]]
 //  CHECK-DAG:   %[[SIZE_D0:.+]] = tensor.dim %[[SOURCE]], %[[C0]]
 //  CHECK-DAG:   %[[SIZE_D1:.+]] = tensor.dim %[[SOURCE]], %[[C1]]
@@ -55,7 +55,7 @@
     %arg3 : f32) -> tensor<49x?xf32> {
   %c0 = arith.constant 0 : index
   %d0 = tensor.dim %arg0, %c0 : tensor<?x42xf32>
-  %init = linalg.init_tensor [42, %d0] : tensor<42x?xf32>
+  %init = tensor.empty(%d0) : tensor<42x?xf32>
   %0 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>],
     iterator_types = ["parallel", "parallel"]} 
@@ -81,7 +81,7 @@
 //  CHECK-DAG:   %[[SOURCE:.+]] = linalg.generic
 //  CHECK-DAG:   %[[SOURCE_D1:.+]] = tensor.dim %[[SOURCE]], %[[C1]]
 //  CHECK-DAG:   %[[TARGET_D1:.+]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[ARG2]], %[[SOURCE_D1]]]
-//      CHECK:   %[[INIT:.+]] = linalg.init_tensor [49, %[[TARGET_D1]]] 
+//      CHECK:   %[[INIT:.+]] = tensor.empty(%[[TARGET_D1]])
 //      CHECK:   %[[FILL:.+]] = linalg.fill ins(%[[ARG3]]{{.*}}outs(%[[INIT]]
 //  CHECK-DAG:   %[[SIZE_D1:.+]] = tensor.dim %[[SOURCE]], %[[C1]]
 //      CHECK:   %[[SLICE:.+]] = tensor.extract_slice %[[FILL]]
diff --git a/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir b/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir
--- a/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir
+++ b/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir
@@ -6,7 +6,7 @@
   %0 = tensor.collapse_shape %arg0 [[0, 1], [2]] : tensor<?x?x?xf32> into tensor<?x?xf32>
   %d0 = tensor.dim %0, %c0 : tensor<?x?xf32>
   %d1 = tensor.dim %0, %c1 : tensor<?x?xf32>
-  %init = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
+  %init = tensor.empty(%d0, %d1) : tensor<?x?xf32>
   %1 = linalg.generic {
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>],
       iterator_types = ["parallel", "parallel"]}
@@ -40,7 +40,7 @@
   %cst = arith.constant 0.0 : f32
   %d0 = tensor.dim %arg0, %c1 : tensor<1x?x?xf32>
   %d1 = tensor.dim %arg1, %c2 : tensor<1x?x?xf32>
-  %init = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
+  %init = tensor.empty(%d0, %d1) : tensor<?x?xf32>
   %fill = linalg.generic {
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>],
       iterator_types = ["parallel", "parallel"]}
diff --git a/mlir/test/Dialect/Linalg/reshape_fusion.mlir b/mlir/test/Dialect/Linalg/reshape_fusion.mlir
--- a/mlir/test/Dialect/Linalg/reshape_fusion.mlir
+++ b/mlir/test/Dialect/Linalg/reshape_fusion.mlir
@@ -142,7 +142,7 @@
 func.func @generic_op_reshape_consumer_static(%arg0: tensor<264x4xf32>)
                                             -> tensor<8x33x4xf32> {
   %cst = arith.constant dense<2.000000e+00> : tensor<264x4xf32>
-  %0 = linalg.init_tensor [264, 4] : tensor<264x4xf32>
+  %0 = tensor.empty() : tensor<264x4xf32>
   %1 = linalg.generic {
      indexing_maps = [#map0, #map0, #map0],
      iterator_types = ["parallel", "parallel"]}
@@ -162,7 +162,7 @@
 // CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<264x4xf32>
 //  CHECK-DAG:   %[[CST:.+]] = arith.constant
 // CHECK-SAME:     : tensor<8x33x4xf32>
-//  CHECK-DAG:   %[[INIT:.+]] = linalg.init_tensor [264, 4]
+//  CHECK-DAG:   %[[INIT:.+]] = tensor.empty()
 //      CHECK:   %[[T0:.+]] = tensor.expand_shape %[[ARG0]]
 // CHECK-SAME:     [0, 1], [2]
 // CHECK-SAME:     tensor<264x4xf32> into tensor<8x33x4xf32>
@@ -281,7 +281,7 @@
 func.func @reshape_as_consumer_permutation
   (%a : tensor<210x6x4xi32>, %b : tensor<210x4xi32>)
     -> tensor<2x3x4x5x6x7xi32> {
-  %shape = linalg.init_tensor [6, 4, 210] : tensor<6x4x210xi32>
+  %shape = tensor.empty() : tensor<6x4x210xi32>
   %c = linalg.generic {
          indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d0, d2)>,
                           affine_map<(d0, d1, d2) -> (d1, d2)>,
@@ -318,7 +318,7 @@
 //       CHECK: func @reshape_as_consumer_permutation
 //  CHECK-SAME:   %[[ARG0:.+]]: tensor<210x6x4xi32>
 //  CHECK-SAME:   %[[ARG1:.+]]: tensor<210x4xi32>
-//   CHECK-DAG:   %[[INIT:.+]] = linalg.init_tensor [6, 4, 210]
+//   CHECK-DAG:   %[[INIT:.+]] = tensor.empty()
 //   CHECK-DAG:   %[[T1:.+]] = tensor.expand_shape %[[ARG0]]
 //  CHECK-SAME:     [0, 1, 2], [3, 4], [5]
 //   CHECK-DAG:   %[[T2:.+]] = tensor.expand_shape %[[ARG1]]
@@ -455,7 +455,7 @@
   %c0 = arith.constant 0 : index
   %0 = tensor.collapse_shape %arg0 [[0, 1]] : tensor<?x?xf32> into tensor<?xf32>
   %1 = tensor.dim %0, %c0 : tensor<?xf32>
-  %2 = linalg.init_tensor [%1] : tensor<?xf32>
+  %2 = tensor.empty(%1) : tensor<?xf32>
   %3 = linalg.generic {
     indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
     iterator_types = ["parallel"]}
@@ -477,7 +477,7 @@
 
 func.func @no_fuse_mismatched_dynamism(%arg0: tensor<2x1xi64>, %arg1: tensor<?xi64>) -> tensor<2xi64> {
   %0 = tensor.collapse_shape %arg0 [[0, 1]] : tensor<2x1xi64> into tensor<2xi64>
-  %1 = linalg.init_tensor [2] : tensor<2xi64>
+  %1 = tensor.empty() : tensor<2xi64>
   %2 = linalg.generic
     {indexing_maps = [affine_map<(d0) -> (d0)>,
                       affine_map<(d0) -> (d0)>,
diff --git a/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir b/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir
--- a/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir
+++ b/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir
@@ -1,42 +1,42 @@
 // RUN: mlir-opt -resolve-shaped-type-result-dims -split-input-file %s | FileCheck %s
 
-func.func @init_tensor_static_dim() -> (index, index) {
+func.func @empty_tensor_static_dim() -> (index, index) {
   %c0 = arith.constant 0 : index
   %c2 = arith.constant 2 : index
   %c6 = arith.constant 6 : index
-  %0 = linalg.init_tensor [4, 5, %c6] : tensor<4x5x?xf32>
+  %0 = tensor.empty(%c6) : tensor<4x5x?xf32>
   %1 = tensor.dim %0, %c2 : tensor<4x5x?xf32>
   %2 = tensor.dim %0, %c0 : tensor<4x5x?xf32>
   return %1, %2 : index, index
 }
-//      CHECK: func @init_tensor_static_dim
+//      CHECK: func @empty_tensor_static_dim
 //  CHECK-DAG:   %[[C4:.+]] = arith.constant 4 : index
 //  CHECK-DAG:   %[[C6:.+]] = arith.constant 6 : index
 //      CHECK:   return %[[C6]], %[[C4]]
 
 // -----
 
-func.func @init_tensor_dynamic_dim(%arg0 : index) -> (index) {
+func.func @empty_tensor_dynamic_dim(%arg0 : index) -> (index) {
   %c2 = arith.constant 2 : index
-  %0 = linalg.init_tensor [4, 5, %arg0] : tensor<4x5x?xf32>
+  %0 = tensor.empty(%arg0) : tensor<4x5x?xf32>
   %1 = tensor.dim %0, %c2 : tensor<4x5x?xf32>
   return %1 : index
 }
-//      CHECK: func @init_tensor_dynamic_dim
+//      CHECK: func @empty_tensor_dynamic_dim
 // CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: index
 //      CHECK:   return %[[ARG0]]
 
 // -----
 
-func.func @init_tensor_dynamic_dim2(%arg0 : index, %arg1 : index) -> (index, index) {
+func.func @empty_tensor_dynamic_dim2(%arg0 : index, %arg1 : index) -> (index, index) {
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
-  %0 = linalg.init_tensor [%arg0, %arg1] : tensor<?x?xf32>
+  %0 = tensor.empty(%arg0, %arg1) : tensor<?x?xf32>
   %1 = tensor.dim %0, %c0 : tensor<?x?xf32>
   %2 = tensor.dim %0, %c1 : tensor<?x?xf32>
   return %1, %2 : index, index
 }
-//      CHECK: func @init_tensor_dynamic_dim2
+//      CHECK: func @empty_tensor_dynamic_dim2
 // CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: index
 // CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: index
 //      CHECK:   return %[[ARG0]], %[[ARG1]]
@@ -87,7 +87,7 @@
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
   %d0 = tensor.dim %arg0, %c0 : tensor<?xf32>
-  %0 = linalg.init_tensor [%d0, %arg1] : tensor<?x?xf32>
+  %0 = tensor.empty(%d0, %arg1) : tensor<?x?xf32>
   %1 = linalg.generic
     {indexing_maps = [affine_map<(d0, d1) -> (d0)>,
                       affine_map<(d0, d1) -> (d0, d1)>],
@@ -149,7 +149,7 @@
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
   %d0 = tensor.dim %arg0, %c0 : tensor<?xf32>
-  %0 = linalg.init_tensor [%d0, %arg1] : tensor<?x?xf32>
+  %0 = tensor.empty(%d0, %arg1) : tensor<?x?xf32>
   %1 = linalg.generic
     {indexing_maps = [affine_map<(d0, d1) -> (d0)>,
                       affine_map<(d0, d1) -> (d0, d1)>],
@@ -173,7 +173,7 @@
 
 #map = affine_map<(d0) -> (d0)>
 
-func.func @init_tensor_dim_of_linalg_result(%arg_0 : tensor<?xf32>,
+func.func @empty_tensor_dim_of_linalg_result(%arg_0 : tensor<?xf32>,
     %arg_1: tensor<?xf32>) -> (index, index) {
   %0, %1 = linalg.generic {
     indexing_maps = [#map, #map, #map],
@@ -190,7 +190,7 @@
   %num_elem_1 = tensor.dim %1, %c0 : tensor<?xf32>
   return %num_elem_0, %num_elem_1 : index, index
 }
-//      CHECK: func @init_tensor_dim_of_linalg_result(
+//      CHECK: func @empty_tensor_dim_of_linalg_result(
 // CHECK-SAME:   %[[ARG_0:[a-zA-Z0-9_]+]]: tensor<?xf32>
 // CHECK-SAME:   %[[ARG_1:[a-zA-Z0-9_]+]]: tensor<?xf32>)
 //      CHECK:   %[[R0:.+]] = tensor.dim %[[ARG_0]]
diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir
--- a/mlir/test/Dialect/Linalg/roundtrip.mlir
+++ b/mlir/test/Dialect/Linalg/roundtrip.mlir
@@ -202,9 +202,9 @@
     %arg0: tensor<?xi32>, %arg1: tensor<?xi32>, %arg2: i32)
     -> (tensor<i32>, tensor<i32>) {
   %c0 = arith.constant 0 : index
-  %0 = linalg.init_tensor [] : tensor<i32>
+  %0 = tensor.empty() : tensor<i32>
   %1 = linalg.fill ins(%arg2 : i32) outs(%0 : tensor<i32>) -> tensor<i32>
-  %2 = linalg.init_tensor [] : tensor<i32>
+  %2 = tensor.empty() : tensor<i32>
   %3 = linalg.fill ins(%arg2 : i32) outs(%2 : tensor<i32>) -> tensor<i32>
   %4:2 = linalg.generic {
     indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>, affine_map<(d0) -> ()>],
@@ -324,23 +324,8 @@
 
 // -----
 
-#attr = {"foo"}
-func.func @init_tensor(%arg0 : index, %arg1 : index)
-{
-  %0 = linalg.init_tensor [3, 42] : tensor<3x42xf32>
-  %1 = linalg.init_tensor [4, %arg0, %arg1, 5] : tensor<4x?x?x5xf32>
-  %2 = linalg.init_tensor [2, 2] : tensor<2x2xf32, #attr>
-  return
-}
-// CHECK-LABEL: func @init_tensor
-//       CHECK:   linalg.init_tensor [3, 42] : tensor<3x42xf32>
-//       CHECK:   linalg.init_tensor [4, %{{.*}}, %{{.*}}, 5] : tensor<4x?x?x5xf32>
-//       CHECK:   linalg.init_tensor [2, 2] : tensor<2x2xf32, {foo}>
-
-// -----
-
 func.func @fill_tensor(%arg0 : index, %arg1 : index, %arg2 : f32) -> tensor<?x?xf32> {
-  %0 = linalg.init_tensor [%arg0, %arg1] : tensor<?x?xf32>
+  %0 = tensor.empty(%arg0, %arg1) : tensor<?x?xf32>
   %1 = linalg.fill ins(%arg2 : f32) outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
   return %1 : tensor<?x?xf32>
 }
diff --git a/mlir/test/Dialect/Linalg/split_reduction.mlir b/mlir/test/Dialect/Linalg/split_reduction.mlir
--- a/mlir/test/Dialect/Linalg/split_reduction.mlir
+++ b/mlir/test/Dialect/Linalg/split_reduction.mlir
@@ -16,7 +16,7 @@
 //  CHECK-DAG: %[[ID:.*]] = arith.constant 0.000000e+00 : f32
 //  CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<16x256xf32> into tensor<16x4x64xf32>
 //  CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<256x32xf32> into tensor<4x64x32xf32>
-//  CHECK-DAG: %[[INI:.*]] = linalg.init_tensor [16, 32, 4] : tensor<16x32x4xf32>
+//  CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<16x32x4xf32>
 //      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<16x32x4xf32>) -> tensor<16x32x4xf32>
 //      CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]]
 // CHECK-SAME:   , iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
@@ -41,7 +41,7 @@
 //  INNERPARALLELCHECK-DAG: %[[ID:.*]] = arith.constant 0.000000e+00 : f32
 //  INNERPARALLELCHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<16x256xf32> into tensor<16x64x4xf32>
 //  INNERPARALLELCHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<256x32xf32> into tensor<64x4x32xf32>
-//  INNERPARALLELCHECK-DAG: %[[INI:.*]] = linalg.init_tensor [16, 32, 4] : tensor<16x32x4xf32>
+//  INNERPARALLELCHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<16x32x4xf32>
 //      INNERPARALLELCHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<16x32x4xf32>) -> tensor<16x32x4xf32>
 //      INNERPARALLELCHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]]
 // INNERPARALLELCHECK-SAME:   , iterator_types = ["parallel", "parallel", "reduction", "parallel"]}
@@ -83,7 +83,7 @@
 //CHECK-LABEL: @generic_split_1d
 //      CHECK: %[[ID:.*]] = arith.constant 1.000000e+00 : f32
 //      CHECK: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1]] : tensor<32xf32> into tensor<4x8xf32>
-//      CHECK: %[[INI:.*]] = linalg.init_tensor [4] : tensor<4xf32>
+//      CHECK: %[[INI:.*]] = tensor.empty() : tensor<4xf32>
 //      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<4xf32>) -> tensor<4xf32>
 //      CHECK: %[[G:.*]] = linalg.generic
 //      CHECK:   {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]],
@@ -107,7 +107,7 @@
 //INNERPARALLELCHECK-LABEL: @generic_split_1d
 //      INNERPARALLELCHECK: %[[ID:.*]] = arith.constant 1.000000e+00 : f32
 //      INNERPARALLELCHECK: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1]] : tensor<32xf32> into tensor<8x4xf32>
-//      INNERPARALLELCHECK: %[[INI:.*]] = linalg.init_tensor [4] : tensor<4xf32>
+//      INNERPARALLELCHECK: %[[INI:.*]] = tensor.empty() : tensor<4xf32>
 //      INNERPARALLELCHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<4xf32>) -> tensor<4xf32>
 //      INNERPARALLELCHECK: %[[G:.*]] = linalg.generic
 //      INNERPARALLELCHECK:   {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]],
@@ -153,7 +153,7 @@
 //      CHECK: %[[ID:.*]] = arith.constant -3.40282347E+38 : f32
 //  CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<32x2xf32> into tensor<4x8x2xf32>
 //  CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<5x32xf32> into tensor<5x4x8xf32>
-//      CHECK: %[[INI:.*]] = linalg.init_tensor [5, 2, 4] : tensor<5x2x4xf32>
+//      CHECK: %[[INI:.*]] = tensor.empty() : tensor<5x2x4xf32>
 //      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<5x2x4xf32>) -> tensor<5x2x4xf32>
 //      CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "reduction", "parallel", "parallel"]}
 // CHECK-SAME:   ins(%[[I1]], %[[I2]] : tensor<4x8x2xf32>, tensor<5x4x8xf32>) outs(%[[F]] : tensor<5x2x4xf32>) {
@@ -177,7 +177,7 @@
 //      INNERPARALLELCHECK: %[[ID:.*]] = arith.constant -3.40282347E+38 : f32
 //  INNERPARALLELCHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<32x2xf32> into tensor<8x4x2xf32>
 //  INNERPARALLELCHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<5x32xf32> into tensor<5x8x4xf32>
-//      INNERPARALLELCHECK: %[[INI:.*]] = linalg.init_tensor [5, 2, 4] : tensor<5x2x4xf32>
+//      INNERPARALLELCHECK: %[[INI:.*]] = tensor.empty() : tensor<5x2x4xf32>
 //      INNERPARALLELCHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<5x2x4xf32>) -> tensor<5x2x4xf32>
 //      INNERPARALLELCHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "reduction", "parallel", "parallel"]}
 // INNERPARALLELCHECK-SAME:   ins(%[[I1]], %[[I2]] : tensor<8x4x2xf32>, tensor<5x8x4xf32>) outs(%[[F]] : tensor<5x2x4xf32>) {
diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
--- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
@@ -65,7 +65,7 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.0 : f32
 
-  %init = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
+  %init = tensor.empty() : tensor<1x112x112x32xf32>
   %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
 
   %conv = linalg.conv_2d_nhwc_hwcf
@@ -109,7 +109,7 @@
 //      CHECK: func @conv_tensors_static
 // CHECK-SAME: (%[[INPUT:.+]]: tensor<1x225x225x3xf32>, %[[FILTER:.+]]: tensor<3x3x3x32xf32>, %[[ELEM:.+]]: tensor<1x112x112x32xf32>)
 
-//      CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
+//      CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x112x112x32xf32>
 // CHECK-NEXT: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
 
 // CHECK-NEXT: scf.for %[[IV0:.+]] = %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[ARG0:.+]] = %[[FILL]])
@@ -147,7 +147,7 @@
   %ow = tensor.dim %elementwise, %c2 : tensor<?x?x?x?xf32>
   %oc = tensor.dim %elementwise, %c3 : tensor<?x?x?x?xf32>
 
-  %init = linalg.init_tensor [%n, %oh, %ow, %oc] : tensor<?x?x?x?xf32>
+  %init = tensor.empty(%n, %oh, %ow, %oc) : tensor<?x?x?x?xf32>
   %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
 
   %conv = linalg.conv_2d_nhwc_hwcf
@@ -216,7 +216,7 @@
 //  CHECK-DAG:   %[[ELEM_OW:.+]] = tensor.dim %[[ELEM]], %[[C2]] : tensor<?x?x?x?xf32>
 //  CHECK-DAG:   %[[ELEM_OC:.+]] = tensor.dim %[[ELEM]], %[[C3]] : tensor<?x?x?x?xf32>
 
-//      CHECK:   %[[INIT:.+]] = linalg.init_tensor [%[[ELEM_N]], %[[ELEM_OH]], %[[ELEM_OW]], %[[ELEM_OC]]] : tensor<?x?x?x?xf32>
+//      CHECK:   %[[INIT:.+]] = tensor.empty(%[[ELEM_N]], %[[ELEM_OH]], %[[ELEM_OW]], %[[ELEM_OC]]) : tensor<?x?x?x?xf32>
 //      CHECK:   %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
 
 //  CHECK-DAG:   %[[FILTER_H:.+]] = tensor.dim %[[FILTER]], %[[C0]] : tensor<?x?x?x?xf32>
diff --git a/mlir/test/Dialect/Linalg/tile-and-peel-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-peel-tensors.mlir
--- a/mlir/test/Dialect/Linalg/tile-and-peel-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-peel-tensors.mlir
@@ -47,7 +47,7 @@
 //     CHECK-PEEL-12:   }
 func.func @matmul_static_tensor(%arg0: tensor<1500x1600xf32>, %arg1: tensor<1600x1700xf32>)
     -> tensor<1500x1700xf32> {
-  %out = linalg.init_tensor [1500, 1700] : tensor<1500x1700xf32>
+  %out = tensor.empty() : tensor<1500x1700xf32>
   %r = linalg.matmul {__internal_linalg_transform__ = "tile"}
       ins(%arg0, %arg1: tensor<1500x1600xf32>, tensor<1600x1700xf32>)
       outs(%out: tensor<1500x1700xf32>) -> tensor<1500x1700xf32>
@@ -102,7 +102,7 @@
   %c1 = arith.constant 1 : index
   %d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
-  %out = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
+  %out = tensor.empty(%d0, %d1) : tensor<?x?xf32>
   %r = linalg.matmul {__internal_linalg_transform__ = "tile"}
       ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x?xf32>)
       outs(%out: tensor<?x?xf32>) -> tensor<?x?xf32>
diff --git a/mlir/test/Dialect/Linalg/tile-fuse-and-distribute.mlir b/mlir/test/Dialect/Linalg/tile-fuse-and-distribute.mlir
--- a/mlir/test/Dialect/Linalg/tile-fuse-and-distribute.mlir
+++ b/mlir/test/Dialect/Linalg/tile-fuse-and-distribute.mlir
@@ -14,7 +14,7 @@
 //  CHECK-DAG: %[[NBLOCKSY:.*]] = gpu.grid_dim y
 //  CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x
 //  CHECK-DAG: %[[NBLOCKSX:.*]] = gpu.grid_dim x
-//  CHECK-DAG: %[[INIT:.+]] = linalg.init_tensor
+//  CHECK-DAG: %[[INIT:.+]] = tensor.empty
 //      CHECK: %[[MUL:.+]] = affine.apply #[[MULMAP]]()[%[[BIDY]], %[[C8]]]
 //      CHECK: %[[LBY:.+]] = affine.apply #[[ADDMAP]]()[%[[MUL]], %[[C0]]]
 //      CHECK: %[[STEPY:.+]] = affine.apply #[[MULMAP]]()[%[[NBLOCKSY]], %[[C8]]]
@@ -43,7 +43,7 @@
   %cst = arith.constant 0.0 : f32
   %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
-  %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
+  %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
   %3 = linalg.fill ins(%cst : f32) outs(%2 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %4 = linalg.matmul {__internal_linalg_transform__ = "tensors_fuse_distribute1"}
        ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x?xf32>)
diff --git a/mlir/test/Dialect/Linalg/tile-scalarize-dynamic-dims.mlir b/mlir/test/Dialect/Linalg/tile-scalarize-dynamic-dims.mlir
--- a/mlir/test/Dialect/Linalg/tile-scalarize-dynamic-dims.mlir
+++ b/mlir/test/Dialect/Linalg/tile-scalarize-dynamic-dims.mlir
@@ -19,7 +19,7 @@
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
   %d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
-  %out = linalg.init_tensor [%d0, 2000] : tensor<?x2000xf32>
+  %out = tensor.empty(%d0) : tensor<?x2000xf32>
   %r = linalg.matmul {__internal_linalg_transform__ = "tile"}
       ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x2000xf32>)
       outs(%out: tensor<?x2000xf32>) -> tensor<?x2000xf32>
diff --git a/mlir/test/Dialect/Linalg/tile-tensors.mlir b/mlir/test/Dialect/Linalg/tile-tensors.mlir
--- a/mlir/test/Dialect/Linalg/tile-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-tensors.mlir
@@ -37,7 +37,7 @@
   %0 = tensor.dim %arg0, %c0 : tensor<?x?x?xf32>
   %1 = tensor.dim %arg0, %c1 : tensor<?x?x?xf32>
   %2 = tensor.dim %arg0, %c2 : tensor<?x?x?xf32>
-  %3 = linalg.init_tensor [%0, %1, %2] : tensor<?x?x?xf32>
+  %3 = tensor.empty(%0, %1, %2) : tensor<?x?x?xf32>
   %4 = linalg.generic
     {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
                       affine_map<(d0, d1, d2) -> (d0, d2, d1)>,
@@ -55,7 +55,7 @@
 // CHECK-LABEL: func @generic_op_tensors
 //  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
 //  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
-//       CHECK:   %[[INIT:.+]] = linalg.init_tensor
+//       CHECK:   %[[INIT:.+]] = tensor.empty
 //       CHECK:   %[[TD0:.+]] = scf.for %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[TC0:.+]] = %[[INIT]]) -> (tensor<?x?x?xf32>) {
 //       CHECK:     %[[TD1:.+]] = scf.for %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[TC1:.+]] = %[[TC0]]) -> (tensor<?x?x?xf32>) {
 //       CHECK:       %[[TD2:.+]] = scf.for %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[TC2:.+]] = %[[TC1]]) -> (tensor<?x?x?xf32>) {
diff --git a/mlir/test/Dialect/Linalg/transform-op-decompose.mlir b/mlir/test/Dialect/Linalg/transform-op-decompose.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-decompose.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-decompose.mlir
@@ -40,8 +40,8 @@
 // CHECK-SAME: %[[ARG0:.+]]: tensor<1x1x113x96xf32>
 // CHECK-SAME: %[[ARG1:.+]]: tensor<1x3x96xf32>
 func.func @depthwise_conv_2d_nhwc_hwc(%input: tensor<1x1x113x96xf32>, %filter: tensor<1x3x96xf32>) -> tensor<1x1x56x96xf32> {
-  // CHECK: %[[RES:.+]] = linalg.init_tensor
-  %init = linalg.init_tensor [1, 1, 56, 96] : tensor<1x1x56x96xf32>
+  // CHECK: %[[RES:.+]] = tensor.empty
+  %init = tensor.empty() : tensor<1x1x56x96xf32>
   // CHECK: %[[SLICE0:.+]] = tensor.extract_slice %[[ARG0]]
   // CHECK: %[[SLICE1:.+]] = tensor.extract_slice %[[ARG1]]
   // CHECK: %[[SLICERES:.+]] = tensor.extract_slice %[[RES]]
diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir
@@ -66,12 +66,12 @@
   //  CHECK-SAME:   %[[IN:[0-9a-z]+]]: tensor<64xf32>
   //  CHECK-SAME:   %[[OUT:[0-9a-z]+]]: tensor<64xf32>
   func.func @fuse_untileable_op(%arg0: index, %arg1: tensor<64xf32>, %arg2: tensor<64xf32>) -> tensor<64xf32> {
-    %0 = linalg.init_tensor [%arg0] : tensor<?xf32>
+    %0 = tensor.empty(%arg0) : tensor<?xf32>
     %1 = affine.apply #map0()[%arg0]
 
     // CHECK: scf.foreach_thread {{.*}} {
     %2 = scf.foreach_thread (%arg3) in (%1) shared_outs(%o = %arg2) -> (tensor<64xf32>) {
-      // CHECK: %[[INIT_TENSOR:.*]] = linalg.init_tensor
+      // CHECK: %[[INIT_TENSOR:.*]] = tensor.empty
       %3 = affine.apply #map1(%arg3)[%arg0]
       %4 = affine.min #map2(%arg3)[%arg0]
       %5 = tensor.extract_slice %o[%3] [%4] [1] : tensor<64xf32> to tensor<?xf32>
@@ -91,10 +91,10 @@
   ^bb0(%arg0: !pdl.operation):
     transform.sequence %arg0 failures(propagate) {
     ^bb1(%arg1: !pdl.operation):
-      %0 = transform.structured.match ops{["linalg.init_tensor"]} in %arg1
+      %0 = transform.structured.match ops{["tensor.empty"]} in %arg1
       %1 = transform.structured.match ops{["scf.foreach_thread"]} in %arg1
 
-      // linalg.init_tensor is not tileable. The op is cloned and fused.
+      // tensor.empty is not tileable. The op is cloned and fused.
       transform.structured.fuse_into_containing_op %0 into %1
     }
   }
diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
@@ -59,9 +59,9 @@
 //  CHECK-SAME: (%[[INPUT:.+]]: tensor<12x7x25xf32>)
 func.func @interchange_reduction(%input: tensor<12x7x25xf32>) -> tensor<12x25xf32> {
   %five = arith.constant 5.0 : f32
-  %init = linalg.init_tensor [12, 25] : tensor<12x25xf32>
+  %init = tensor.empty() : tensor<12x25xf32>
 
-//       CHECK: %[[INIT:.+]] = linalg.init_tensor [12, 25]
+//       CHECK: %[[INIT:.+]] = tensor.empty()
 //   CHECK-DAG: %[[C5:.+]] = arith.constant 5 : index
 //   CHECK-DAG: %[[C7:.+]] = arith.constant 7 : index
 //       CHECK: scf.for %[[IV0:.+]] = %{{.+}} to %{{.+}} step %[[C5]] iter_args(%[[FOR_ARG0:.+]] = %[[INIT]])
diff --git a/mlir/test/Dialect/Linalg/transform-op-match.mlir b/mlir/test/Dialect/Linalg/transform-op-match.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-match.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-match.mlir
@@ -49,7 +49,7 @@
 #map1 = affine_map<(d0, d1, d2) -> (d1, d0, d2)>
 func.func @match_complex_attribute(%arg0: tensor<12x128x32xf32>)
     -> tensor<128x12x32xf32> {
-  %0 = linalg.init_tensor [128, 12, 32] : tensor<128x12x32xf32>
+  %0 = tensor.empty() : tensor<128x12x32xf32>
   // expected-remark @below {{matched complex attr}}
   %1 = linalg.generic {indexing_maps = [#map0, #map1],
                        iterator_types = ["parallel", "parallel", "parallel"]}
diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir
--- a/mlir/test/Dialect/Linalg/vectorization.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization.mlir
@@ -182,7 +182,7 @@
 func.func @generic_interchanged_transpose(%arg0: tensor<12x128x32xf32>) -> tensor<128x12x32xf32> {
   // CHECK: %[[IN:.+]] = vector.transfer_read
   // CHECK: vector.transfer_write %[[IN]], {{.+}} permutation_map = #[[MAP]]
-  %0 = linalg.init_tensor [128, 12, 32] : tensor<128x12x32xf32>
+  %0 = tensor.empty() : tensor<128x12x32xf32>
   %1 = linalg.generic {indexing_maps = [#map0, #map1],
                        iterator_types = ["parallel", "parallel", "parallel"]}
     ins(%arg0 : tensor<12x128x32xf32>)
@@ -786,7 +786,7 @@
 //   CHECK-NOT:   tensor.pad
 //   CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
 //   CHECK-DAG:   %[[C2:.*]] = arith.constant 2 : index
-//   CHECK-DAG:   %[[INIT:.*]] = linalg.init_tensor [2, 3, 4] : tensor<2x3x4xf32>
+//   CHECK-DAG:   %[[INIT:.*]] = tensor.empty() : tensor<2x3x4xf32>
 //   CHECK-DAG:   %[[VEC:.*]] = vector.broadcast %[[PAD]] : f32 to vector<2x3x4xf32>
 //       CHECK:   %[[FILL:.*]] = vector.transfer_write %[[VEC]], %[[INIT]]{{.*}} : vector<2x3x4xf32>, tensor<2x3x4xf32>
 //       CHECK:   %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, false, true]} : tensor<2x?x2xf32>, vector<2x3x2xf32>
@@ -818,7 +818,7 @@
 //   CHECK-NOT:   tensor.pad
 //   CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
 //   CHECK-DAG:   %[[C2:.*]] = arith.constant 2 : index
-//       CHECK:   %[[INIT:.*]] = linalg.init_tensor [2, 6, 4] : tensor<2x6x4xf32>
+//       CHECK:   %[[INIT:.*]] = tensor.empty() : tensor<2x6x4xf32>
 //       CHECK:   %[[VEC:.*]] =  vector.broadcast %[[PAD]] : f32 to vector<2x6x4xf32>
 //       CHECK:   %[[FILL:.*]] = vector.transfer_write %[[VEC]], %[[INIT]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<2x6x4xf32>, tensor<2x6x4xf32>
 //       CHECK:   %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]], %[[C0]]], %{{.*}} {in_bounds = [true, true, true]} : tensor<2x5x2xf32>, vector<2x5x2xf32>
@@ -858,7 +858,7 @@
 //       CHECK:   %[[DIM3:.*]] = tensor.dim %[[SRC]], %[[C3]] : tensor<1x2x2x?xf32>
 //       CHECK:   %[[V4:.*]] = arith.addi %[[DIM3]], %[[C3]] : index
 //       CHECK:   %[[V5:.*]] = arith.addi %[[V4]], %[[C2]] : index
-//       CHECK:   %[[INIT:.*]] = linalg.init_tensor [6, %[[V1]], %[[V2]], %[[V5]]] : tensor<6x?x?x?xf32>
+//       CHECK:   %[[INIT:.*]] = tensor.empty(%[[V1]], %[[V2]], %[[V5]]) : tensor<6x?x?x?xf32>
 //       CHECK:   %[[FILL:.*]] = linalg.fill ins(%{{.*}} : f32) outs(%[[INIT]] : tensor<6x?x?x?xf32>) -> tensor<6x?x?x?xf32>
 //       CHECK:   %[[SRCDIM:.*]] = tensor.dim %[[SRC]], %[[C3]] : tensor<1x2x2x?xf32>
 //       CHECK:   %[[RESULT:.*]] = tensor.insert_slice %[[SRC]] into %[[FILL]][2, %[[LOW]], 3, 3] [1, 2, 2, %[[SRCDIM]]] [1, 1, 1, 1] : tensor<1x2x2x?xf32> into tensor<6x?x?x?xf32>
@@ -1192,11 +1192,11 @@
 // CHECK-LABEL:   func @red_max_2d(
 func.func @red_max_2d(%arg0: tensor<4x4xf32>) -> tensor<4xf32> {
   // CHECK: %[[CMINF:.+]] = arith.constant dense<-3.402820e+38> : vector<4xf32>
-  // CHECK: linalg.init_tensor [4] : tensor<4xf32>
+  // CHECK: tensor.empty() : tensor<4xf32>
   // CHECK: vector.multi_reduction <maxf>, {{.*}}, %[[CMINF]] [1] : vector<4x4xf32> to vector<4xf32>
   // CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32>
   %ident = arith.constant -3.40282e+38 : f32
-  %init = linalg.init_tensor [4] : tensor<4xf32>
+  %init = tensor.empty() : tensor<4xf32>
   %fill = linalg.fill ins(%ident : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32>
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0)>],
@@ -1225,12 +1225,12 @@
 // CHECK-LABEL:   func @red_min_2d(
 func.func @red_min_2d(%arg0: tensor<4x4xf32>) -> tensor<4xf32> {
   // CHECK: %[[CMAXF:.+]] = arith.constant dense<3.402820e+38> : vector<4xf32>
-  // CHECK: linalg.init_tensor [4] : tensor<4xf32>
+  // CHECK: tensor.empty() : tensor<4xf32>
   // CHECK: vector.transfer_read {{.*}} : tensor<4x4xf32>, vector<4x4xf32>
   // CHECK: vector.multi_reduction <minf>, {{.*}}, %[[CMAXF]] [1] : vector<4x4xf32> to vector<4xf32>
   // CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32>
   %maxf32 = arith.constant 3.40282e+38 : f32
-  %init = linalg.init_tensor [4] : tensor<4xf32>
+  %init = tensor.empty() : tensor<4xf32>
   %fill = linalg.fill ins(%maxf32 : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32>
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0)>],
@@ -1258,12 +1258,12 @@
 
 // CHECK-LABEL:   func @red_mul_2d(
 func.func @red_mul_2d(%arg0: tensor<4x4xf32>) -> tensor<4xf32> {
-  // CHECK: linalg.init_tensor [4] : tensor<4xf32>
+  // CHECK: tensor.empty() : tensor<4xf32>
   // CHECK: vector.transfer_read {{.*}} : tensor<4x4xf32>, vector<4x4xf32>
   // CHECK: vector.multi_reduction <mul>, {{.*}}, {{.*}} [1] : vector<4x4xf32> to vector<4xf32>
   // CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32>
   %ident = arith.constant 1.0 : f32
-  %init = linalg.init_tensor [4] : tensor<4xf32>
+  %init = tensor.empty() : tensor<4xf32>
   %fill = linalg.fill ins(%ident : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32>
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0)>],
@@ -1291,12 +1291,12 @@
 
 // CHECK-LABEL:   func @red_or_2d(
 func.func @red_or_2d(%arg0: tensor<4x4xi1>) -> tensor<4xi1> {
-  // CHECK: linalg.init_tensor [4] : tensor<4xi1>
+  // CHECK: tensor.empty() : tensor<4xi1>
   // CHECK: vector.transfer_read {{.*}} : tensor<4x4xi1>, vector<4x4xi1>
   // CHECK: vector.multi_reduction <or>, {{.*}}, {{.*}} [1] : vector<4x4xi1> to vector<4xi1>
   // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
   %ident = arith.constant false
-  %init = linalg.init_tensor [4] : tensor<4xi1>
+  %init = tensor.empty() : tensor<4xi1>
   %fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) -> tensor<4xi1>
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0)>],
@@ -1324,12 +1324,12 @@
 
 // CHECK-LABEL:   func @red_and_2d(
 func.func @red_and_2d(%arg0: tensor<4x4xi1>) -> tensor<4xi1> {
-  // CHECK: linalg.init_tensor [4] : tensor<4xi1>
+  // CHECK: tensor.empty() : tensor<4xi1>
   // CHECK: vector.transfer_read {{.*}} : tensor<4x4xi1>, vector<4x4xi1>
   // CHECK: vector.multi_reduction <and>, {{.*}}, {{.*}} [1] : vector<4x4xi1> to vector<4xi1>
   // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
   %ident = arith.constant true
-  %init = linalg.init_tensor [4] : tensor<4xi1>
+  %init = tensor.empty() : tensor<4xi1>
   %fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) -> tensor<4xi1>
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0)>],
@@ -1357,12 +1357,12 @@
 
 // CHECK-LABEL:   func @red_xor_2d(
 func.func @red_xor_2d(%arg0: tensor<4x4xi1>) -> tensor<4xi1> {
-  // CHECK: linalg.init_tensor [4] : tensor<4xi1>
+  // CHECK: tensor.empty() : tensor<4xi1>
   // CHECK: vector.transfer_read {{.*}} : tensor<4x4xi1>, vector<4x4xi1>
   // CHECK: vector.multi_reduction <xor>, {{.*}}, {{.*}} [1] : vector<4x4xi1> to vector<4xi1>
   // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
   %ident = arith.constant false
-  %init = linalg.init_tensor [4] : tensor<4xi1>
+  %init = tensor.empty() : tensor<4xi1>
   %fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) -> tensor<4xi1>
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0)>],
@@ -1397,7 +1397,7 @@
   // CHECK: subf {{.*}} : vector<4x4xf32>
   // CHECK: vector.transfer_write {{.*}} {in_bounds = [true, true]} : vector<4x4xf32>, tensor<4x4xf32>
   %c0 = arith.constant 0.0 : f32
-  %init = linalg.init_tensor [4, 4] : tensor<4x4xf32>
+  %init = tensor.empty() : tensor<4x4xf32>
   %fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<4x4xf32>) -> tensor<4x4xf32>
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0, 0)>,
@@ -1436,7 +1436,7 @@
   // CHECK: vector.multi_reduction <add>, {{.*}}, {{.*}} : vector<4x4xf32> to vector<4xf32>
   // CHECK: vector.transfer_write {{.*}} {in_bounds = [true]} : vector<4xf32>, tensor<4xf32>
   %c0 = arith.constant 0.0 : f32
-  %init = linalg.init_tensor [4] : tensor<4xf32>
+  %init = tensor.empty() : tensor<4xf32>
   %fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32>
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0, 0)>,
@@ -1478,8 +1478,8 @@
   //  CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
   %f0 = arith.constant 0.000000e+00 : f32
 
-  //      CHECK: %[[init:.*]] = linalg.init_tensor [] : tensor<f32>
-  %0 = linalg.init_tensor [] : tensor<f32>
+  //      CHECK: %[[init:.*]] = tensor.empty() : tensor<f32>
+  %0 = tensor.empty() : tensor<f32>
 
   %1 = linalg.fill ins(%f0 : f32) outs(%0 : tensor<f32>) -> tensor<f32>
   //      CHECK: %[[r:.*]] = vector.transfer_read %[[A]][%[[C0]]]
@@ -1524,7 +1524,7 @@
 // CHECK-LABEL:   func @not_projected_permutation
 func.func @not_projected_permutation(%arg0: tensor<8x8xf32>) -> tensor<6x6x3x3xf32> {
   %c0 = arith.constant 0.0 : f32
-  %init = linalg.init_tensor [6, 6, 3, 3] : tensor<6x6x3x3xf32>
+  %init = tensor.empty() : tensor<6x6x3x3xf32>
   %fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<6x6x3x3xf32>) -> tensor<6x6x3x3xf32>
   // CHECK: linalg.generic
   %result = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0 + d2, d1 + d3)>,
diff --git a/mlir/test/Dialect/SparseTensor/sparse_1d.mlir b/mlir/test/Dialect/SparseTensor/sparse_1d.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_1d.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_1d.mlir
@@ -49,7 +49,7 @@
 // CHECK:           %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32
 // CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK:           %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_INITTENSOR:.*]] = linalg.init_tensor [32] : tensor<32xf32>
+// CHECK:           %[[VAL_INITTENSOR:.*]] = tensor.empty() : tensor<32xf32>
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>> to memref<?xf32>
 // CHECK:           %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_INITTENSOR]] : memref<32xf32>
 // CHECK:           linalg.fill ins(%[[VAL_3]] : f32) outs(%[[VAL_7]] : memref<32xf32>)
@@ -62,7 +62,7 @@
 // CHECK:           return %[[VAL_11]] : tensor<32xf32>
 // CHECK:         }
 func.func @add_d_init(%arga: tensor<32xf32, #DV>, %argb: f32) -> tensor<32xf32> {
-  %u = linalg.init_tensor [32] : tensor<32xf32>
+  %u = tensor.empty() : tensor<32xf32>
   %0 = linalg.generic #trait1
      ins(%arga: tensor<32xf32, #DV>)
     outs(%u: tensor<32xf32>) {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir b/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir
@@ -27,7 +27,7 @@
 // CHECK:       }
 func.func @fold_yield_arg_zero() -> tensor<1024x1024xf64> {
   %cst = arith.constant 0.000000e+00 : f64
-  %0 = linalg.init_tensor [1024, 1024] : tensor<1024x1024xf64>
+  %0 = tensor.empty() : tensor<1024x1024xf64>
   %1 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>,
                                         affine_map<(d0, d1) -> (d0, d1)>],
                                         iterator_types = ["parallel", "parallel"]}
@@ -46,7 +46,7 @@
 // CHECK:       }
 func.func @fold_yield_direct_zero() -> tensor<32xf64> {
   %cst = arith.constant 0.000000e+00 : f64
-  %0 = linalg.init_tensor [32] : tensor<32xf64>
+  %0 = tensor.empty() : tensor<32xf64>
   %1 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>],
                                         iterator_types = ["parallel"]}
                                         outs(%0 : tensor<32xf64>) {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir
@@ -32,7 +32,7 @@
 // CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 0 : index} : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 0 : index} : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
-// CHECK-DAG:       %[[VAL_10a:.*]] = linalg.init_tensor [8] : tensor<8xi64>
+// CHECK-DAG:       %[[VAL_10a:.*]] = tensor.empty() : tensor<8xi64>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_10a]] : memref<8xi64>
 // CHECK-DAG:       linalg.fill ins(%[[VAL_5]] : i64) outs(%[[VAL_10]] : memref<8xi64>)
 // CHECK-DAG:       %[[VAL_11:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
@@ -50,7 +50,7 @@
 // CHECK:           return %[[VAL_20]] : tensor<8xi64>
 // CHECK:         }
 func.func @sparse_index_1d_conj(%arga: tensor<8xi64, #SparseVector>) -> tensor<8xi64> {
-  %init = linalg.init_tensor [8] : tensor<8xi64>
+  %init = tensor.empty() : tensor<8xi64>
   %r = linalg.generic #trait_1d
       ins(%arga: tensor<8xi64, #SparseVector>)
      outs(%init: tensor<8xi64>) {
@@ -73,7 +73,7 @@
 // CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 0 : index} : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 0 : index} : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
-// CHECK-DAG:       %[[VAL_9a:.*]] = linalg.init_tensor [8] : tensor<8xi64>
+// CHECK-DAG:       %[[VAL_9a:.*]] = tensor.empty() : tensor<8xi64>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_9a]] : memref<8xi64>
 // CHECK-DAG:       linalg.fill ins(%[[VAL_3]] : i64) outs(%[[VAL_9]] : memref<8xi64>)
 // CHECK-DAG:       %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
@@ -112,7 +112,7 @@
 // CHECK:           return %[[VAL_35]] : tensor<8xi64>
 // CHECK:         }
 func.func @sparse_index_1d_disj(%arga: tensor<8xi64, #SparseVector>) -> tensor<8xi64> {
-  %init = linalg.init_tensor [8] : tensor<8xi64>
+  %init = tensor.empty() : tensor<8xi64>
   %r = linalg.generic #trait_1d
       ins(%arga: tensor<8xi64, #SparseVector>)
      outs(%init: tensor<8xi64>) {
diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir
--- a/mlir/test/Dialect/Tensor/canonicalize.mlir
+++ b/mlir/test/Dialect/Tensor/canonicalize.mlir
@@ -1523,3 +1523,108 @@
   %1 = tensor.insert_slice %0 into %input[%c0, 1, %c0, 0] [1, 1, 2, 4] [1, 1, 1, 1] : tensor<1x2x4xf32> into tensor<1x2x2x4xf32>
   return %1: tensor<1x2x2x4xf32>
 }
+
+// -----
+
+func.func @empty_canonicalize() -> (tensor<4x5x?xf32>) {
+  %c6 = arith.constant 6 : index
+  %0 = tensor.empty(%c6) : tensor<4x5x?xf32>
+  return %0 : tensor<4x5x?xf32>
+}
+// CHECK: func @empty_canonicalize
+// CHECK:   %[[T0:.+]] = tensor.empty() : tensor<4x5x6xf32>
+// CHECK:   %[[T1:.+]] = tensor.cast %[[T0]] : tensor<4x5x6xf32> to tensor<4x5x?xf32>
+// CHECK:   return %[[T1]]
+
+// -----
+
+func.func @empty_reshape_expansion(%arg0 : index) -> tensor<2x3x5x4x?x7xf32> {
+  %0 = tensor.empty(%arg0) : tensor<6x5x?xf32>
+  %1 = tensor.expand_shape %0 [[0, 1], [2], [3, 4, 5]]
+      : tensor<6x5x?xf32> into tensor<2x3x5x4x?x7xf32>
+  return %1 : tensor<2x3x5x4x?x7xf32>
+}
+//      CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 floordiv 28)>
+//      CHECK: func @empty_reshape_expansion
+// CHECK-SAME:     %[[ARG0:.+]]: index
+// CHECK-NEXT:   %[[D:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]]
+// CHECK-NEXT:   %[[INIT:.+]] = tensor.empty(%[[D]])
+// CHECK-NEXT:   return %[[INIT]]
+
+// -----
+
+func.func @empty_reshape_collapse(%arg0 : index) -> tensor<6x5x?xf32> {
+  %0 = tensor.empty(%arg0) : tensor<2x3x5x4x?x7xf32>
+  %1 = tensor.collapse_shape %0 [[0, 1], [2], [3, 4, 5]]
+      : tensor<2x3x5x4x?x7xf32> into tensor<6x5x?xf32>
+  return %1 : tensor<6x5x?xf32>
+}
+//      CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 * 28)>
+//      CHECK: func @empty_reshape_collapse
+// CHECK-SAME:     %[[ARG0:.+]]: index
+// CHECK-NEXT:   %[[D:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]]
+// CHECK-NEXT:   %[[INIT:.+]] = tensor.empty(%[[D]])
+// CHECK-NEXT:   return %[[INIT]]
+
+// -----
+
+func.func @fold_empty_tensor_with_slice
+  (%arg0 : index, %arg1 : index) -> tensor<5x?x20xf32>
+{
+  %0 = tensor.empty(%arg0) : tensor<?x10x40xf32>
+  %1 = tensor.extract_slice %0[0, 0, 0] [5, %arg1, 20] [1, 1, 1]
+    : tensor<?x10x40xf32> to tensor<5x?x20xf32>
+  return %1 : tensor<5x?x20xf32>
+}
+//      CHECK: func @fold_empty_tensor_with_slice
+// CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: index
+// CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: index
+//      CHECK:   %[[T0:.+]] = tensor.empty(%[[ARG1]])
+//      CHECK:   return %[[T0]]
+
+// -----
+
+func.func @fold_empty_tensor_with_cast(%arg0 : index) -> tensor<1x12xf32> {
+  %0 = tensor.empty(%arg0) : tensor<?x12xf32>
+  %1 = tensor.cast %0 : tensor<?x12xf32> to tensor<1x12xf32>
+  return %1 : tensor<1x12xf32>
+}
+//      CHECK: func @fold_empty_tensor_with_cast(%[[ARG0:.+]]: index)
+//      CHECK:   %[[T0:.+]] = tensor.empty() : tensor<1x12xf32>
+//      CHECK:   return %[[T0]] : tensor<1x12xf32>
+
+// -----
+
+func.func private @some_use(%i : index, %j : index)
+
+// CHECK-LABEL: func @empty_tensor_canonicalize
+//  CHECK-SAME:   %[[I:.*]]: index
+func.func @empty_tensor_canonicalize(%i : index) {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+
+  // CHECK-NOT: tensor.empty
+  %0 = tensor.empty(%i) : tensor<?x42xf32>
+
+  // CHECK-NOT: tensor.dim
+  %1 = tensor.dim %0, %c0: tensor<?x42xf32>
+  %2 = tensor.dim %0, %c1: tensor<?x42xf32>
+
+  // CHECK: %[[c42:.*]] = arith.constant 42 : index
+  // CHECK: call @some_use(%[[I]], %[[c42]])
+  call @some_use(%1, %2) : (index, index) -> ()
+
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func @rank_reducing_empty_tensor_extract
+func.func @rank_reducing_empty_tensor_extract(%sz : index, %idx : index) -> tensor<2xf32> {
+  // CHECK: tensor.empty() : tensor<2xf32>
+  %a = tensor.empty(%sz) : tensor<?x2xf32>
+
+  // CHECK-NOT: extract
+  %r = tensor.extract_slice %a[%idx, 0] [1, 2] [1, 1] : tensor<?x2xf32> to tensor<2xf32>
+  return %r: tensor<2xf32>
+}
diff --git a/mlir/test/Dialect/Tensor/extract-slice-from-collapse-shape.mlir b/mlir/test/Dialect/Tensor/extract-slice-from-collapse-shape.mlir
--- a/mlir/test/Dialect/Tensor/extract-slice-from-collapse-shape.mlir
+++ b/mlir/test/Dialect/Tensor/extract-slice-from-collapse-shape.mlir
@@ -14,7 +14,7 @@
 // CHECK-DAG: %[[c3:.+]] = arith.constant 3 : index
 // CHECK-DAG: %[[c5:.+]] = arith.constant 5 : index
 // CHECK-DAG: %[[c7:.+]] = arith.constant 7 : index
-// CHECK-DAG: %[[init:.+]] = linalg.init_tensor [20, 11] :
+// CHECK-DAG: %[[init:.+]] = tensor.empty() : tensor<20x11xf32>
 // CHECK-DAG: %[[tile:.+]] = scf.for %[[iv:.+]] = %[[c0]] to %[[c20]] step %[[c1]] iter_args(%[[iterArg:.+]] = %[[init]])
 //     CHECK:   %[[multiIndex:.+]]:3 = affine.delinearize_index %[[iv]] into (%[[c3]], %[[c5]], %[[c7]]
 //     CHECK:   %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex]]#0, %[[multiIndex]]#1, %[[multiIndex]]#2, 0] [1, 1, 1, 11] [1, 1, 1, 1] : 
@@ -28,7 +28,7 @@
 // FOREACH-DAG: %[[c3:.+]] = arith.constant 3 : index
 // FOREACH-DAG: %[[c5:.+]] = arith.constant 5 : index
 // FOREACH-DAG: %[[c7:.+]] = arith.constant 7 : index
-// FOREACH-DAG: %[[init:.+]] = linalg.init_tensor [20, 11] :
+// FOREACH-DAG: %[[init:.+]] = tensor.empty() : tensor<20x11xf32>
 //     FOREACH: %[[tile:.+]] = scf.foreach_thread (%[[iv:.+]]) in (%[[c20]]) shared_outs(%[[dest:.+]] = %[[init]])
 //     FOREACH:   %[[multiIndex:.+]]:3 = affine.delinearize_index %[[iv]] into (%[[c3]], %[[c5]], %[[c7]]
 //     FOREACH:   %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex]]#0, %[[multiIndex]]#1, %[[multiIndex]]#2, 0] [1, 1, 1, 11] [1, 1, 1, 1] : 
@@ -54,7 +54,7 @@
 // CHECK-DAG: %[[c3:.+]] = arith.constant 3 : index
 // CHECK-DAG: %[[c5:.+]] = arith.constant 5 : index
 // CHECK-DAG: %[[c7:.+]] = arith.constant 7 : index
-//     CHECK: %[[init:.+]] = linalg.init_tensor [10, 5] :
+//     CHECK: %[[init:.+]] = tensor.empty() : tensor<10x5xf32>
 //     CHECK: %[[tile:.+]] = scf.for %[[iv:.+]] = %[[c0]] to %[[c10]] step %[[c1]] iter_args(%[[iterArg:.+]] = %[[init]])
 //     CHECK:   %[[inputIv:.+]] = affine.apply #[[$map0]](%[[iv]])
 //     CHECK:   %[[multiIndex:.+]]:3 = affine.delinearize_index %[[inputIv]] into (%[[c3]], %[[c5]], %[[c7]]
@@ -80,7 +80,7 @@
 // CHECK-DAG:   %[[c1:.+]] = arith.constant 1 : index
 // CHECK-DAG:   %[[c2:.+]] = arith.constant 2 : index
 // CHECK-DAG:   %[[c3:.+]] = arith.constant 3 : index
-//     CHECK:   %[[init:.+]] = linalg.init_tensor [%[[sz]], 5] : tensor<?x5xf32>
+//     CHECK:   %[[init:.+]] = tensor.empty(%[[sz]]) : tensor<?x5xf32>
 // CHECK-DAG:   %[[d1:.+]] = tensor.dim %arg0, %[[c1]] : tensor<3x?x?x11xf32>
 // CHECK-DAG:   %[[d2:.+]] = tensor.dim %arg0, %[[c2]] : tensor<3x?x?x11xf32>
 //     CHECK:   %[[tile:.+]] = scf.for %[[iv:.+]] = %[[c0]] to %[[sz]] step %[[c1]] iter_args(%[[iterArg:.+]] = %[[init]])
@@ -109,7 +109,7 @@
 // CHECK-DAG: %[[c3:.+]] = arith.constant 3 : index
 // CHECK-DAG: %[[c4:.+]] = arith.constant 4 : index
 // CHECK-DAG: %[[c11:.+]] = arith.constant 11 : index
-//     CHECK: %[[init:.+]] = linalg.init_tensor [%[[sz1]], %[[sz2]]] : tensor<?x?xf32>
+//     CHECK: %[[init:.+]] = tensor.empty(%[[sz1]], %[[sz2]]) : tensor<?x?xf32>
 // CHECK-DAG: %[[d1:.+]] = tensor.dim %[[arg0]], %[[c1]] : 
 // CHECK-DAG: %[[d2:.+]] = tensor.dim %[[arg0]], %[[c2]] : 
 // CHECK-DAG: %[[d4:.+]] = tensor.dim %[[arg0]], %[[c4]] :
@@ -133,7 +133,7 @@
 // FOREACH-DAG: %[[c3:.+]] = arith.constant 3 : index
 // FOREACH-DAG: %[[c4:.+]] = arith.constant 4 : index
 // FOREACH-DAG: %[[c11:.+]] = arith.constant 11 : index
-//     FOREACH:     %[[init:.+]] = linalg.init_tensor [%[[sz1]], %[[sz2]]] : tensor<?x?xf32>
+//     FOREACH:     %[[init:.+]] = tensor.empty(%[[sz1]], %[[sz2]]) : tensor<?x?xf32>
 // FOREACH-DAG:     %[[d1:.+]] = tensor.dim %[[arg0]], %[[c1]] : 
 // FOREACH-DAG:     %[[d2:.+]] = tensor.dim %[[arg0]], %[[c2]] : 
 // FOREACH-DAG:     %[[d4:.+]] = tensor.dim %[[arg0]], %[[c4]] :
@@ -170,7 +170,7 @@
   %collapsed = tensor.collapse_shape %input [[0, 1], [2]] : tensor<30x11x100xf32> into tensor<330x100xf32>
   %slice = tensor.extract_slice %collapsed [0, %offt] [330, %size] [1, 1] : tensor<330x100xf32> to tensor<330x?xf32>
   // CHECK-NOT: scf.for  
-  // CHECK: %[[init:.+]] = linalg.init_tensor [330, %[[arg2]]]
+  // CHECK: %[[init:.+]] = tensor.empty(%[[arg2]])
   // CHECK: %[[e:.+]] = tensor.extract_slice %[[arg0]][0, 0, %[[arg1]]] [30, 11, %[[arg2]]] [1, 1, 1]
   // CHECK: %[[c:.+]] = tensor.collapse_shape %[[e]] {{\[}}[0, 1], [2]]
   // CHECK: %[[res:.+]] = tensor.insert_slice %[[c]] into %[[init]]
diff --git a/mlir/test/Dialect/Tensor/invalid.mlir b/mlir/test/Dialect/Tensor/invalid.mlir
--- a/mlir/test/Dialect/Tensor/invalid.mlir
+++ b/mlir/test/Dialect/Tensor/invalid.mlir
@@ -514,3 +514,11 @@
     (tensor<f32>, tensor<4x5x6xf32>, tensor<1x2x3xindex>) -> tensor<1x2x1xf32>
   return
 }
+
+// -----
+
+func.func @empty_wrong_number_of_operands(%sz : index) {
+  // expected-error@+1 {{incorrect number of dynamic sizes, has 1, expected 2}}
+  %out = tensor.empty(%sz) : tensor<2x?x?x5xf32>
+  return
+}
diff --git a/mlir/test/Dialect/Tensor/ops.mlir b/mlir/test/Dialect/Tensor/ops.mlir
--- a/mlir/test/Dialect/Tensor/ops.mlir
+++ b/mlir/test/Dialect/Tensor/ops.mlir
@@ -13,6 +13,14 @@
   return
 }
 
+// CHECK-LABEL: func @empty(
+//  CHECK-SAME:             %[[sz:.*]]: index
+func.func @empty(%sz: index) -> tensor<5x?x6xf32> {
+  // CHECK: tensor.empty(%[[sz]]) : tensor<5x?x6xf32>
+  %0 = tensor.empty(%sz) : tensor<5x?x6xf32>
+  return %0 : tensor<5x?x6xf32>
+}
+
 // CHECK-LABEL:   func @extract(
 // CHECK-SAME:                  %[[TENSOR:.*]]: tensor<?x?x?xf32>,
 // CHECK-SAME:                  %[[INDEX:.*]]: index) {
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir
@@ -1,5 +1,5 @@
 // RUN: mlir-opt %s -test-linalg-transform-patterns=test-linalg-to-vector-patterns \
-// RUN: -linalg-init-tensor-to-alloc-tensor -linalg-bufferize -arith-bufferize \
+// RUN: -empty-tensor-to-alloc-tensor -linalg-bufferize -arith-bufferize \
 // RUN: -bufferization-bufferize -tensor-bufferize -func-bufferize \
 // RUN: -finalizing-bufferize -buffer-deallocation \
 // RUN: -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
--- a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
@@ -39,6 +39,7 @@
 from mlir.dialects import func
 from mlir.dialects import linalg
 from mlir.dialects import sparse_tensor
+from mlir.dialects import tensor
 from mlir.dialects.linalg.opdsl import lang
 
 from . import mlir_pytaco_utils as utils
@@ -899,9 +900,9 @@
     if self.dst_format is None or self.dst_format.rank() == 0:
       # Initialize the dense tensor.
       ir_type = _mlir_type_from_taco_type(self.dst_dtype)
-      tensor = linalg.InitTensorOp(self.dst_dims, ir_type).result
+      empty = tensor.EmptyOp(self.dst_dims, ir_type).result
       zero = arith.ConstantOp(ir_type, 0.0)
-      return linalg.fill(zero, outs=[tensor])
+      return linalg.fill(zero, outs=[empty])
 
     # Initialize the sparse tensor.
     mlir_type = _mlir_tensor_type(self.dst_dtype, self.dst_dims,
@@ -1194,12 +1195,12 @@
     """
     if array.dtype != np.float32 and array.dtype != np.float64:
       raise ValueError(f"Expected floating point value type: {array.dtype}.")
-    tensor = Tensor(
+    t = Tensor(
         array.shape,
         dtype=_nptype_to_taco_type(array.dtype.type),
         is_dense=True)
-    tensor._dense_storage = np.copy(array)
-    return tensor
+    t._dense_storage = np.copy(array)
+    return t
 
   @staticmethod
   def from_coo(
@@ -1234,9 +1235,9 @@
     # The size of each dimension is one more that such a maximum coordinate
     # value.
     shape = [c + 1 for c in max_coordinate]
-    tensor = Tensor(shape, fmt, dtype=dtype)
-    tensor._coords = coordinates
-    tensor._values = values
+    t = Tensor(shape, fmt, dtype=dtype)
+    t._coords = coordinates
+    t._values = values
 
     return tensor
 
@@ -1261,10 +1262,10 @@
     sparse_tensor, shape = utils.create_sparse_tensor(filename,
                                                       fmt.format_pack.formats,
                                                       _dtype_to_mlir_str(dtype))
-    tensor = Tensor(shape.tolist(), fmt, dtype=dtype)
-    tensor._set_packed_sparse_tensor(sparse_tensor)
+    t = Tensor(shape.tolist(), fmt, dtype=dtype)
+    t._set_packed_sparse_tensor(sparse_tensor)
 
-    return tensor
+    return t
 
   def to_file(self, filename: str) -> None:
     """Output the tensor value to a file.
diff --git a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
--- a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
+++ b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
@@ -6,7 +6,7 @@
   %cst = arith.constant 0.0 : f32
   %d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
-  %init = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
+  %init = tensor.empty(%d0, %d1) : tensor<?x?xf32>
   %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<?x?xf32>) -> tensor<?x?xf32>
   %gemm = linalg.matmul {__internal_linalg_transform__ = "fusion"}
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
@@ -16,7 +16,7 @@
 //      CHECK: func.func @gemm_fill_fusion(
 // CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
 // CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?xf32>)
-//      CHECK:   %[[INIT:.+]] = linalg.init_tensor
+//      CHECK:   %[[INIT:.+]] = tensor.empty
 //      CHECK:   scf.for %[[IV0:[a-zA-Z0-9]+]] =
 // CHECK-SAME:       iter_args(%[[ITERARG0:.+]] = %[[INIT]])
 //      CHECK:     scf.for %[[IV1:[a-zA-Z0-9]+]] =
@@ -41,7 +41,7 @@
   %cst = arith.constant 0.0 : f32
   %d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
-  %init = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
+  %init = tensor.empty(%d0, %d1) : tensor<?x?xf32>
   %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<?x?xf32>) -> tensor<?x?xf32>
   %gemm = linalg.matmul
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
@@ -61,7 +61,7 @@
 // CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
 // CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?xf32>,
 // CHECK-SAME:     %[[ARG2:[a-zA-Z0-9]+]]: tensor<?xf32>)
-//      CHECK:   %[[INIT:.+]] = linalg.init_tensor
+//      CHECK:   %[[INIT:.+]] = tensor.empty
 //      CHECK:   scf.for %[[IV0:[a-zA-Z0-9]+]] =
 // CHECK-SAME:       iter_args(%[[ITERARG0:.+]] = %[[INIT]])
 //      CHECK:     scf.for %[[IV1:[a-zA-Z0-9]+]] =
@@ -90,12 +90,12 @@
   %cst = arith.constant 0.0 : f32
   %d0 = tensor.dim %lhs0, %c0 : tensor<?x?xf32>
   %d1 = tensor.dim %rhs0, %c1 : tensor<?x?xf32>
-  %init0 = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
+  %init0 = tensor.empty(%d0, %d1) : tensor<?x?xf32>
   %fill0 = linalg.fill ins(%cst : f32) outs(%init0 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %gemm0 = linalg.matmul
       ins(%lhs0, %rhs0 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%fill0 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %d2 = tensor.dim %rhs1, %c1 : tensor<?x?xf32>
-  %init1 = linalg.init_tensor [%d0, %d2] : tensor<?x?xf32>
+  %init1 = tensor.empty(%d0, %d2) : tensor<?x?xf32>
   %fill1 = linalg.fill ins(%cst : f32) outs(%init1 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %gemm1 = linalg.matmul  {__internal_linalg_transform__ = "gemm_fusion"}
       ins(%gemm0, %rhs1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%fill1 : tensor<?x?xf32>) -> tensor<?x?xf32>
@@ -109,9 +109,9 @@
 //  CHECK-DAG:   %[[C1:.+]] = arith.constant 1 : index
 //  CHECK-DAG:   %[[D0:.+]] = tensor.dim %[[LHS0]], %[[C0]]
 //  CHECK-DAG:   %[[D1:.+]] = tensor.dim %[[RHS0]], %[[C1]]
-//  CHECK-DAG:   %[[INIT0:.+]] = linalg.init_tensor [%[[D0]], %[[D1]]]
+//  CHECK-DAG:   %[[INIT0:.+]] = tensor.empty(%[[D0]], %[[D1]])
 //  CHECK-DAG:   %[[D2:.+]] = tensor.dim %[[RHS1]], %[[C1]]
-//      CHECK:   %[[INIT1:.+]] = linalg.init_tensor [%[[D0]], %[[D2]]]
+//      CHECK:   %[[INIT1:.+]] = tensor.empty(%[[D0]], %[[D2]])
 //      CHECK:   scf.for %[[IV:[a-zA-Z0-9]+]] =
 // CHECK-SAME:       iter_args(%[[ITERARG:.+]] = %[[INIT1]])
 //  CHECK-DAG:     %[[LHS0_TILE:.+]] = tensor.extract_slice %[[LHS0]][%[[IV]], 0]
@@ -140,12 +140,12 @@
   %cst = arith.constant 0.0 : f32
   %d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
-  %init0 = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
+  %init0 = tensor.empty(%d0, %d1) : tensor<?x?xf32>
   %fill = linalg.fill ins(%cst : f32) outs(%init0 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %gemm = linalg.matmul
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
       outs(%fill : tensor<?x?xf32>) -> tensor<?x?xf32>
-  %init1 = linalg.init_tensor [%d1, %d0] : tensor<?x?xf32>
+  %init1 = tensor.empty(%d1, %d0) : tensor<?x?xf32>
   %transpose = linalg.generic {
       __internal_linalg_transform__ = "fusion",
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>],
@@ -163,8 +163,8 @@
 //  CHECK-DAG:   %[[C1:.+]] = arith.constant 1 : index
 //  CHECK-DAG:   %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
 //  CHECK-DAG:   %[[D1:.+]] = tensor.dim %[[ARG1]], %[[C1]]
-//  CHECK-DAG:   %[[INIT0:.+]] = linalg.init_tensor [%[[D0]], %[[D1]]]
-//  CHECK-DAG:   %[[INIT1:.+]] = linalg.init_tensor [%[[D1]], %[[D0]]]
+//  CHECK-DAG:   %[[INIT0:.+]] = tensor.empty(%[[D0]], %[[D1]])
+//  CHECK-DAG:   %[[INIT1:.+]] = tensor.empty(%[[D1]], %[[D0]])
 //      CHECK:   scf.for %[[IV0:[a-zA-Z0-9]+]] =
 // CHECK-SAME:       iter_args(%[[ITERARG0:.+]] = %[[INIT1]])
 //      CHECK:     scf.for %[[IV1:[a-zA-Z0-9]+]] =
@@ -192,7 +192,7 @@
   %d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
   %cst = arith.constant 0.0 : f32
-  %0 = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
+  %0 = tensor.empty(%d0, %d1) : tensor<?x?xf32>
   %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %2 = linalg.matmul
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
@@ -211,7 +211,7 @@
 //      CHECK: func.func @interchange_matmul_fusion(
 // CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
 // CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?xf32>)
-//      CHECK:   %[[INIT:.+]] = linalg.init_tensor
+//      CHECK:   %[[INIT:.+]] = tensor.empty
 //      CHECK:   scf.for %[[IV0:[a-zA-Z0-9]+]] =
 // CHECK-SAME:       iter_args(%[[ITERARG0:.+]] = %[[INIT]])
 //      CHECK:     scf.for %[[IV1:[a-zA-Z0-9]+]] =
@@ -243,7 +243,7 @@
     outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %3 = tensor.dim %2, %c0 : tensor<?x?xf32>
   %4 = tensor.dim %2, %c1 : tensor<?x?xf32>
-  %5 = linalg.init_tensor [%3, %4] : tensor<?x?xf32>
+  %5 = tensor.empty(%3, %4) : tensor<?x?xf32>
   %6 = linalg.generic
     {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                       affine_map<(d0, d1) -> (d0, d1)>,
@@ -302,7 +302,7 @@
     outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %3 = tensor.dim %2, %c0 : tensor<?x?xf32>
   %4 = tensor.dim %2, %c1 : tensor<?x?xf32>
-  %5 = linalg.init_tensor [%3, %4] : tensor<?x?xf32>
+  %5 = tensor.empty(%3, %4) : tensor<?x?xf32>
   %6 = linalg.generic
     {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                       affine_map<(d0, d1) -> (d1, d0)>,
diff --git a/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir
--- a/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir
+++ b/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir
@@ -87,8 +87,8 @@
 #map1 = affine_map<(d0, d1, d2) -> (d0, d2, d1)>
 #map2 = affine_map<(d0, d1, d2) -> (d2, d0, d1)>
 func.func @multi_result(%arg0 : tensor<128x200x300xf32>) -> (tensor<128x300x200xf32>, tensor<300x128x200xf32>) {
-  %init0 = linalg.init_tensor [128, 300, 200] : tensor<128x300x200xf32>
-  %init1 = linalg.init_tensor [300, 128, 200] : tensor<300x128x200xf32>
+  %init0 = tensor.empty() : tensor<128x300x200xf32>
+  %init1 = tensor.empty() : tensor<300x128x200xf32>
   %0:2 = linalg.generic {
       indexing_maps = [#map0, #map1, #map2],
       iterator_types = ["parallel", "parallel", "parallel"]}
@@ -108,8 +108,8 @@
 //  CHECK-DAG:   %[[C20:.+]] = arith.constant 20 : index
 //  CHECK-DAG:   %[[C128:.+]] = arith.constant 128 : index
 //  CHECK-DAG:   %[[C300:.+]] = arith.constant 300 : index
-//  CHECK-DAG:   %[[INIT0:.+]] = linalg.init_tensor [128, 300, 200]
-//  CHECK-DAG:   %[[INIT1:.+]] = linalg.init_tensor [300, 128, 200]
+//  CHECK-DAG:   %[[INIT0:.+]] = tensor.empty()
+//  CHECK-DAG:   %[[INIT1:.+]] = tensor.empty()
 //      CHECK:   %[[OUTER:[a-zA-Z0-9]+]]:2 = scf.for %[[IV0:[a-zA-Z0-9]+]] = %[[C0]] to %[[C128]] step %[[C10]]
 // CHECK-SAME:       iter_args(%[[ARG1:[a-zA-Z0-9]+]] = %[[INIT0]], %[[ARG2:[a-zA-Z0-9]+]] = %[[INIT1]])
 //      CHECK:     %[[TS_Y:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[C10]], %[[C128]]]
diff --git a/mlir/test/lib/Dialect/Tensor/TestTensorTransforms.cpp b/mlir/test/lib/Dialect/Tensor/TestTensorTransforms.cpp
--- a/mlir/test/lib/Dialect/Tensor/TestTensorTransforms.cpp
+++ b/mlir/test/lib/Dialect/Tensor/TestTensorTransforms.cpp
@@ -136,8 +136,8 @@
     // Create the destination tensor using the above values.
     Type elementType = op.getSourceType().getElementType();
     SmallVector<OpFoldResult> outputShape = getAsOpFoldResult(reifiedShapes[0]);
-    Value dest = rewriter.create<linalg::InitTensorOp>(
-        op->getLoc(), outputShape, elementType);
+    Value dest = rewriter.create<tensor::EmptyOp>(op->getLoc(), outputShape,
+                                                  elementType);
 
     // Calculate the parameters for the tile loop nest.
     FailureOr<tensor::ExtractSliceFromCollapseHelper> params =
diff --git a/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py b/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py
--- a/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py
+++ b/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py
@@ -4,6 +4,7 @@
 from mlir.dialects import builtin
 from mlir.dialects import func
 from mlir.dialects import linalg
+from mlir.dialects import tensor
 
 from mlir.dialects.linalg.opdsl.lang import *
 
@@ -50,7 +51,7 @@
     # CHECK-LABEL: func @test_matmul_mono
     # CHECK-SAME:  %[[A:.+]]: tensor<4x16xf32>
     # CHECK-SAME:  %[[B:.+]]: tensor<16x8xf32>
-    # CHECK: %[[INITC:.+]] = linalg.init_tensor [4, 8] : tensor<4x8xf32>
+    # CHECK: %[[INITC:.+]] = tensor.empty() : tensor<4x8xf32>
     # CHECK: linalg.generic
     # CHECK-SAME: indexing_maps = [#[[$MUL_MAP_A]], #[[$MUL_MAP_B]], #[[$MUL_MAP_C]]]
     # CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]
@@ -59,7 +60,7 @@
     @func.FuncOp.from_py_func(
         RankedTensorType.get((4, 16), f32), RankedTensorType.get((16, 8), f32))
     def test_matmul_mono(lhs, rhs):
-      init_result = linalg.InitTensorOp([4, 8], f32)
+      init_result = tensor.EmptyOp([4, 8], f32)
       return matmul_mono(lhs, rhs, outs=[init_result.result])
 
     # CHECK-LABEL: @test_i8i8i32_matmul
diff --git a/mlir/test/python/dialects/linalg/ops.py b/mlir/test/python/dialects/linalg/ops.py
--- a/mlir/test/python/dialects/linalg/ops.py
+++ b/mlir/test/python/dialects/linalg/ops.py
@@ -1,6 +1,6 @@
 # RUN: %PYTHON %s | FileCheck %s
 
-from mlir.dialects import arith, builtin, func, linalg
+from mlir.dialects import arith, builtin, func, linalg, tensor
 from mlir.dialects.linalg.opdsl.lang import *
 from mlir.ir import *
 
@@ -11,46 +11,6 @@
   return f
 
 
-# CHECK-LABEL: TEST: testInitTensor
-@run
-def testInitTensor():
-  with Context() as ctx, Location.unknown():
-    module = Module.create()
-    f32 = F32Type.get()
-    with InsertionPoint(module.body):
-      # CHECK-LABEL: func @static_sizes
-      # CHECK: %0 = linalg.init_tensor [3, 4] : tensor<3x4xf32>
-      @func.FuncOp.from_py_func()
-      def static_sizes():
-        return linalg.InitTensorOp([3, 4], f32)
-
-      # CHECK-LABEL: func @dynamic_sizes
-      # CHECK: %0 = linalg.init_tensor [%arg0, %arg1] : tensor<?x?xf32>
-      @func.FuncOp.from_py_func(IndexType.get(), IndexType.get())
-      def dynamic_sizes(d0, d1):
-        return linalg.InitTensorOp([d0, d1], f32)
-
-      # CHECK-LABEL: func @zero_d
-      # CHECK: %0 = linalg.init_tensor [] : tensor<f32>
-      @func.FuncOp.from_py_func()
-      def zero_d():
-        return linalg.InitTensorOp([], f32)
-
-  print(module)
-
-
-# CHECK-LABEL: TEST: testInitTensorStaticSizesAttribute
-@run
-def testInitTensorStaticSizesAttribute():
-  with Context() as ctx, Location.unknown():
-    module = Module.create()
-    f32 = F32Type.get()
-    with InsertionPoint(module.body):
-      op = linalg.InitTensorOp([3, 4], f32)
-      # CHECK: [3, 4]
-      print(op.attributes["static_sizes"])
-
-
 # CHECK-LABEL: TEST: testFill
 @run
 def testFill():
@@ -92,7 +52,7 @@
       @func.FuncOp.from_py_func(
           RankedTensorType.get((4, 8), f32), RankedTensorType.get((4, 8), f32))
       def named_form(lhs, rhs):
-        init_result = linalg.InitTensorOp([4, 8], f32)
+        init_result = tensor.EmptyOp([4, 8], f32)
         # Check for the named form with custom format
         #      CHECK: linalg.elemwise_unary
         # CHECK-SAME:    cast = #linalg.type_fn<cast_signed>
@@ -127,7 +87,7 @@
           RankedTensorType.get((4, 16), f32), RankedTensorType.get((16, 8),
                                                                    f32))
       def named_form(lhs, rhs):
-        init_result = linalg.InitTensorOp([4, 8], f32)
+        init_result = tensor.EmptyOp([4, 8], f32)
         #      CHECK: "linalg.matmul"(%{{.*}})
         # CHECK-NEXT:  ^bb0(%{{.*}}: f32, %{{.*}}: f32, %{{.*}}: f32):
         # CHECK-NEXT:    arith.mulf{{.*}} (f32, f32) -> f32
@@ -153,7 +113,7 @@
           RankedTensorType.get((4, 16), f32), RankedTensorType.get((16, 8),
                                                                    f32))
       def generic_form(lhs, rhs):
-        init_result = linalg.InitTensorOp([4, 8], f32)
+        init_result = tensor.EmptyOp([4, 8], f32)
         # CHECK: linalg.generic
         return linalg.matmul(
             lhs, rhs, outs=[init_result.result], emit_generic=True)
@@ -178,8 +138,8 @@
         lhs = linalg.fill(one, outs=[arg0])
         # CHECK: %[[RHS:.*]] = linalg.fill
         rhs = linalg.fill(one, outs=[arg1])
-        # CHECK: %[[INIT:.*]] = linalg.init_tensor
-        init = linalg.InitTensorOp([4, 8], f32)
+        # CHECK: %[[INIT:.*]] = tensor.empty
+        init = tensor.EmptyOp([4, 8], f32)
         # CHECK: linalg.matmul
         # CHECK: ins(%[[LHS]], %[[RHS]]
         # CHECK: outs(%[[INIT]]
diff --git a/mlir/test/python/dialects/tensor.py b/mlir/test/python/dialects/tensor.py
--- a/mlir/test/python/dialects/tensor.py
+++ b/mlir/test/python/dialects/tensor.py
@@ -37,3 +37,37 @@
         return [d0.result, d1.result]
 
     print(module)
+
+
+# CHECK-LABEL: TEST: testEmptyOp
+@run
+def testEmptyOp():
+  with Context() as ctx, Location.unknown():
+    module = Module.create()
+    f32 = F32Type.get()
+    with InsertionPoint(module.body):
+      # CHECK-LABEL: func @static_sizes
+      # CHECK: %0 = tensor.empty() : tensor<3x4xf32>
+      @func.FuncOp.from_py_func()
+      def static_sizes():
+        return tensor.EmptyOp([3, 4], f32)
+
+      # CHECK-LABEL: func @dynamic_sizes
+      # CHECK: %0 = tensor.empty(%arg0, %arg1) : tensor<?x?xf32>
+      @func.FuncOp.from_py_func(IndexType.get(), IndexType.get())
+      def dynamic_sizes(d0, d1):
+        return tensor.EmptyOp([d0, d1], f32)
+
+      # CHECK-LABEL: func @mixed_static_dynamic_sizes
+      # CHECK: %0 = tensor.empty(%arg0) : tensor<?x4xf32>
+      @func.FuncOp.from_py_func(IndexType.get())
+      def mixed_static_dynamic_sizes(d0):
+        return tensor.EmptyOp([d0, 4], f32)
+
+      # CHECK-LABEL: func @zero_d
+      # CHECK: %0 = tensor.empty() : tensor<f32>
+      @func.FuncOp.from_py_func()
+      def zero_d():
+        return tensor.EmptyOp([], f32)
+
+  print(module)
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -5032,6 +5032,7 @@
     hdrs = ["include/mlir/Dialect/Tensor/IR/Tensor.h"],
     includes = ["include"],
     deps = [
+        ":AffineDialect",
         ":ArithDialect",
         ":ArithUtils",
         ":CastOpInterfaces",