diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h b/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h
--- a/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h
@@ -12,6 +12,7 @@
 #include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h"
 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Interfaces/CopyOpInterface.h"
+#include "mlir/Interfaces/InferTypeOpInterface.h"
 
 //===----------------------------------------------------------------------===//
 // Bufferization Dialect
diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationBase.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationBase.td
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationBase.td
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationBase.td
@@ -25,7 +25,9 @@
     found in [bufferization](/docs/Bufferization/) and [buffer
     deallocation](/docs/BufferDeallocationInternals/).
   }];
-  let dependentDialects = ["memref::MemRefDialect", "tensor::TensorDialect"];
+  let dependentDialects = [
+    "AffineDialect", "memref::MemRefDialect", "tensor::TensorDialect"
+  ];
 
   let extraClassDeclaration = [{
     /// An attribute that can override writability of buffers of tensor function
diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
@@ -12,12 +12,128 @@
 include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.td"
 include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td"
 include "mlir/Dialect/Bufferization/IR/BufferizationBase.td"
+include "mlir/Interfaces/InferTypeOpInterface.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 include "mlir/Interfaces/CopyOpInterface.td"
 
 class Bufferization_Op<string mnemonic, list<Trait> traits = []>
     : Op<Bufferization_Dialect, mnemonic, traits>;
 
+//===----------------------------------------------------------------------===//
+// AllocTensorOp
+//===----------------------------------------------------------------------===//
+
+def Bufferization_AllocTensorOp : Bufferization_Op<"alloc_tensor",
+    [BufferizableOpInterface,
+     DeclareOpInterfaceMethods<ReifyRankedShapedTypeOpInterface>]> {
+  let summary = "buffer allocation in tensor land";
+
+  let description = [{
+    `bufferization.alloc_tensor` is an operation that bufferizes to a buffer
+    allocation of a given shape. The shape could be dynamic or static.
+    Reading from the result of an `alloc_tensor` op yields an undefined value.
+
+    `alloc_tensor` is a helper op for bufferization. It marks the beginning of
+    a new tensor SSA use-def chain and is used to control in-place bufferization
+    decisions during One-Shot Bufferize.
+  }];
+
+  let arguments =
+    (ins Variadic<Index>:$sizes, I64ArrayAttr:$static_sizes);
+
+  let results = (outs AnyTensor:$result);
+
+  let assemblyFormat = [{
+    custom<OperandsOrIntegersSizesList>($sizes, $static_sizes) attr-dict
+    `:` type($result)
+  }];
+
+  let extraClassDeclaration = [{
+    LogicalResult bufferize(RewriterBase &rewriter, BufferizationState &state);
+
+    bool isMemoryWrite(OpResult opResult, const AnalysisState &state) const {
+      // AllocTensorOps allocate but do not write.
+      return false;
+    }
+
+    static StringRef getStaticSizesAttrName() {
+      return "static_sizes";
+    }
+
+    RankedTensorType getType() {
+      return getResult().getType().cast<RankedTensorType>();
+    }
+
+    // Infer the shape of the result tensor given the static shapes
+    // and element type of the result tensor.
+    static Type inferResultType(ArrayRef<int64_t> staticSizes, Type elementType,
+                                Attribute encoding = {});
+
+    // Return true if the size of the tensor is dynamic at `idx`
+    bool isDynamicSize(unsigned idx) {
+      APInt v = *(static_sizes().getAsValueRange<IntegerAttr>().begin() + idx);
+      return ShapedType::isDynamic(v.getSExtValue());
+    }
+
+    // Assert that the size of the result tensor is static at `idx`
+    // and return the shape.
+    int64_t getStaticSize(unsigned idx) {
+      assert(!isDynamicSize(idx) && "expected static size");
+      APInt v = *(static_sizes().
+          template getAsValueRange<IntegerAttr>().begin() + idx);
+        return v.getSExtValue();
+    }
+
+    // Return the argument position that contains the dynamic size of
+    // the tensor at dimension `idx`. Asserts that the shape is
+    // dynamic at that `idx`.
+    unsigned getIndexOfDynamicSize(unsigned idx) {
+      assert(isDynamicSize(idx) && "expected dynamic size");
+      return std::count_if(
+          static_sizes().getValue().begin(),
+          static_sizes().getValue().begin() + idx,
+          [&](Attribute attr) {
+            return ShapedType::isDynamic(attr.cast<IntegerAttr>().getInt());
+          });
+    }
+
+    // Return both static and dynamic sizes as a list of `OpFoldResult`.
+    SmallVector<OpFoldResult> getMixedSizes();
+
+    // Return the Value of the dynamic size of the tensor at dimension
+    // `idx`. Asserts that the shape is dynamic at that `idx.
+    Value getDynamicSize(unsigned idx) {
+      return getOperand(getIndexOfDynamicSize(idx));
+    }
+  }];
+
+  let builders = [
+    OpBuilder<(ins "ValueRange":$shape,
+                  "ArrayRef<int64_t>":$staticShape, "Type":$elementType),
+    [{
+      build($_builder, $_state,
+            AllocTensorOp::inferResultType(staticShape, elementType),
+            shape, $_builder.getI64ArrayAttr(staticShape));
+    }]>,
+    OpBuilder<(ins "ValueRange":$shape, "Type":$elementType),
+    [{
+      SmallVector<int64_t, 4> staticShape(
+        shape.size(), ShapedType::kDynamicSize);
+      build($_builder, $_state, shape, staticShape, elementType);
+    }]>,
+    OpBuilder<(ins "ArrayRef<int64_t>":$staticShape, "Type":$elementType),
+    [{
+      build($_builder, $_state, ValueRange{}, staticShape, elementType);
+    }]>,
+    OpBuilder<(ins "ArrayRef<OpFoldResult>":$sizes, "Type":$elementType,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>
+  ];
+
+  let hasCanonicalizer = 1;
+  let hasCustomAssemblyFormat = 1;
+  let hasVerifier = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // CloneOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/AllocTensorElimination.h
copy from mlir/include/mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h
copy to mlir/include/mlir/Dialect/Bufferization/Transforms/AllocTensorElimination.h
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/AllocTensorElimination.h
@@ -1,4 +1,4 @@
-//===- BufferizableOpInterfaceImpl.h - Impl. of BufferizableOpInterface ---===//
+//===- AllocTensorElimination.h - alloc_tensor op elimination -------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,17 +6,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef MLIR_DIALECT_LINALG_BUFFERIZABLEOPINTERFACEIMPL_H
-#define MLIR_DIALECT_LINALG_BUFFERIZABLEOPINTERFACEIMPL_H
+#ifndef MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_ALLOCTENSORELIMINATION_H
+#define MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_ALLOCTENSORELIMINATION_H
 
 #include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
 
 namespace mlir {
-class DialectRegistry;
+namespace bufferization {
 
-namespace linalg {
-
-/// A function that matches anchor OpOperands for InitTensorOp elimination.
+/// A function that matches anchor OpOperands for AllocTensorOp elimination.
 /// If an OpOperand is matched, the function should populate the SmallVector
 /// with all values that are needed during `RewriteFn` to produce the
 /// replacement value.
@@ -25,28 +23,26 @@
 /// A function that rewrites matched anchors.
 using RewriteFn = std::function<Value(OpBuilder &, Location, OpOperand &)>;
 
-/// Try to eliminate InitTensorOps inside `op`.
+/// Try to eliminate AllocTensorOps inside `op`.
 ///
-/// * `rewriteFunc` generates the replacement for the InitTensorOp.
-/// * Only InitTensorOps that are anchored on a matching OpOperand as per
+/// * `rewriteFunc` generates the replacement for the AllocTensorOp.
+/// * Only AllocTensorOps that are anchored on a matching OpOperand as per
 ///   `anchorMatchFunc` are considered. "Anchored" means that there is a path
 ///   on the reverse SSA use-def chain, starting from the OpOperand and always
 ///   following the aliasing  OpOperand, that eventually ends at a single
-///   InitTensorOp.
-LogicalResult eliminateInitTensors(RewriterBase &rewriter, Operation *op,
-                                   bufferization::AnalysisState &state,
-                                   AnchorMatchFn anchorMatchFunc,
-                                   RewriteFn rewriteFunc);
+///   AllocTensorOp.
+LogicalResult eliminateAllocTensors(RewriterBase &rewriter, Operation *op,
+                                    bufferization::AnalysisState &state,
+                                    AnchorMatchFn anchorMatchFunc,
+                                    RewriteFn rewriteFunc);
 
-/// Try to eliminate InitTensorOps inside `op` that are anchored on an
+/// Try to eliminate AllocTensorOps inside `op` that are anchored on an
 /// InsertSliceOp, i.e., if it is eventually inserted into another tensor
 /// (and some other conditions are met).
-LogicalResult insertSliceAnchoredInitTensorEliminationStep(
+LogicalResult insertSliceAnchoredAllocTensorEliminationStep(
     RewriterBase &rewriter, Operation *op, bufferization::AnalysisState &state);
 
-void registerBufferizableOpInterfaceExternalModels(DialectRegistry &registry);
-
-} // namespace linalg
+} // namespace bufferization
 } // namespace mlir
 
-#endif // MLIR_DIALECT_LINALG_BUFFERIZABLEOPINTERFACEIMPL_H
+#endif // MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_ALLOCTENSORELIMINATION_H
diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
@@ -64,6 +64,13 @@
 std::unique_ptr<Pass>
 createPromoteBuffersToStackPass(std::function<bool(Value)> isSmallAlloc);
 
+/// Create a pass that tries to eliminate alloc_tensor ops that are anchored on
+/// insert_slice ops.
+std::unique_ptr<Pass> createAllocTensorEliminationPass();
+
+/// Create a pass that bufferizes ops from the bufferization dialect.
+std::unique_ptr<Pass> createBufferizationBufferizePass();
+
 //===----------------------------------------------------------------------===//
 // Registration
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
@@ -149,6 +149,11 @@
   let constructor = "mlir::bufferization::createFinalizingBufferizePass()";
 }
 
+def BufferizationBufferize : Pass<"bufferization-bufferize", "func::FuncOp"> {
+  let summary = "Bufferize the `bufferization` dialect";
+  let constructor = "mlir::bufferization::createBufferizationBufferizePass()";
+}
+
 def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> {
   let summary = "One-Shot Bufferize";
   let description = [{
@@ -309,4 +314,16 @@
   ];
 }
 
+def AllocTensorElimination : Pass<"eliminate-alloc-tensors"> {
+  let summary = "Try to eliminate all alloc_tensor ops.";
+  let description = [{
+    This pass tries to eliminate all insert_slice op-anchored alloc_tensor ops.
+    I.e., when a value that is equivalent to an alloc_tensor op is inserted into
+    another tensor, this pass tries to rewrite the IR in such a way that the
+    destination tensor of the insert_slice op is used directly instead of the
+    alloc_tensor result.
+  }];
+  let constructor = "mlir::bufferization::createAllocTensorEliminationPass()";
+}
+
 #endif // MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_PASSES
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@@ -27,11 +27,16 @@
 def Linalg_InitTensorOp : Linalg_Op<"init_tensor",
     [NoSideEffect,
      DeclareOpInterfaceMethods<ReifyRankedShapedTypeOpInterface>]> {
-  let summary = "operation to define a tensor of particular value";
+  let summary = "operation to define a tensor of particular shape";
 
   let description = [{
-    `linalg.init_tensor` is an operation that materializes a tensor of
-    a given shape. The shape could be dynamic or static.
+    `linalg.init_tensor` is an operation that defines a tensor of a particular
+    shape. The shape could be dynamic or static. The contents of the tensor are
+    unspecified and the only purpose of the op result is to materialize the
+    specified shape in IR and make it available to other transformations.
+
+    Note: This op can be lowered to a `bufferization.alloc_tensor`, at which
+    point it turns into an explicit buffer allocation.
   }];
 
   let arguments =
diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.h b/mlir/include/mlir/Dialect/Linalg/Passes.h
--- a/mlir/include/mlir/Dialect/Linalg/Passes.h
+++ b/mlir/include/mlir/Dialect/Linalg/Passes.h
@@ -62,9 +62,8 @@
 std::unique_ptr<OperationPass<func::FuncOp>>
 createConvertLinalgToAffineLoopsPass();
 
-/// Create a pass that tries to eliminate init_tensor ops that are anchored on
-/// insert_slice ops.
-std::unique_ptr<Pass> createLinalgInitTensorEliminationPass();
+/// Create a pass that rewrites init_tensor to alloc_tensor.
+std::unique_ptr<Pass> createLinalgInitTensorToAllocTensorPass();
 
 /// Create a pass to convert Linalg operations which work on tensors to use
 /// buffers instead.
diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td
--- a/mlir/include/mlir/Dialect/Linalg/Passes.td
+++ b/mlir/include/mlir/Dialect/Linalg/Passes.td
@@ -24,16 +24,14 @@
   let dependentDialects = ["linalg::LinalgDialect", "memref::MemRefDialect"];
 }
 
-def LinalgInitTensorElimination : Pass<"linalg-eliminate-init-tensors"> {
-  let summary = "Try to eliminate all init_tensor ops.";
+def LinalgInitTensorToAllocTensor : Pass<"linalg-init-tensor-to-alloc-tensor"> {
+  let summary = "Replace all init_tensor ops by alloc_tensor ops.";
   let description = [{
-    This pass tries to eliminate all insert_slice op-anchored init_tensor ops.
-    I.e., when a value that is aliasing with an init_tensor op is inserted into
-    another tensor, this pass tries to rewrite the IR in such a way that the
-    destination tensor of the insert_slice op is used directly instead of the
-    init_tensor result.
+    init_tensor ops return a tensor of unspecified contents who's only purpose
+    is to carry the tensor shape. This pass converts such ops to
+    bufferization.alloc_tensor ops, which bufferize to buffer allocations.
   }];
-  let constructor = "mlir::createLinalgInitTensorEliminationPass()";
+  let constructor = "mlir::createLinalgInitTensorToAllocTensorPass()";
 }
 
 def LinalgFoldUnitExtentDims : Pass<"linalg-fold-unit-extent-dims", ""> {
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h b/mlir/include/mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h
@@ -9,43 +9,11 @@
 #ifndef MLIR_DIALECT_LINALG_BUFFERIZABLEOPINTERFACEIMPL_H
 #define MLIR_DIALECT_LINALG_BUFFERIZABLEOPINTERFACEIMPL_H
 
-#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
-
 namespace mlir {
 class DialectRegistry;
 
 namespace linalg {
-
-/// A function that matches anchor OpOperands for InitTensorOp elimination.
-/// If an OpOperand is matched, the function should populate the SmallVector
-/// with all values that are needed during `RewriteFn` to produce the
-/// replacement value.
-using AnchorMatchFn = std::function<bool(OpOperand &, SmallVector<Value> &)>;
-
-/// A function that rewrites matched anchors.
-using RewriteFn = std::function<Value(OpBuilder &, Location, OpOperand &)>;
-
-/// Try to eliminate InitTensorOps inside `op`.
-///
-/// * `rewriteFunc` generates the replacement for the InitTensorOp.
-/// * Only InitTensorOps that are anchored on a matching OpOperand as per
-///   `anchorMatchFunc` are considered. "Anchored" means that there is a path
-///   on the reverse SSA use-def chain, starting from the OpOperand and always
-///   following the aliasing  OpOperand, that eventually ends at a single
-///   InitTensorOp.
-LogicalResult eliminateInitTensors(RewriterBase &rewriter, Operation *op,
-                                   bufferization::AnalysisState &state,
-                                   AnchorMatchFn anchorMatchFunc,
-                                   RewriteFn rewriteFunc);
-
-/// Try to eliminate InitTensorOps inside `op` that are anchored on an
-/// InsertSliceOp, i.e., if it is eventually inserted into another tensor
-/// (and some other conditions are met).
-LogicalResult insertSliceAnchoredInitTensorEliminationStep(
-    RewriterBase &rewriter, Operation *op, bufferization::AnalysisState &state);
-
 void registerBufferizableOpInterfaceExternalModels(DialectRegistry &registry);
-
 } // namespace linalg
 } // namespace mlir
 
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
@@ -127,6 +128,167 @@
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// AllocTensorOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult AllocTensorOp::bufferize(RewriterBase &rewriter,
+                                       BufferizationState &state) {
+  // Nothing to do for dead AllocTensorOps.
+  if (getOperation()->getUses().empty())
+    return success();
+
+  FailureOr<Value> alloc = state.createAlloc(rewriter, getLoc(), getResult());
+  if (failed(alloc))
+    return failure();
+  replaceOpWithBufferizedValues(rewriter, getOperation(), *alloc);
+  return success();
+}
+
+void AllocTensorOp::build(OpBuilder &b, OperationState &result,
+                          ArrayRef<OpFoldResult> sizes, Type elementType,
+                          ArrayRef<NamedAttribute> attrs) {
+  SmallVector<Value, 4> dynamicSizes;
+  SmallVector<int64_t, 4> staticSizes;
+  dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes,
+                             ShapedType::kDynamicSize);
+  auto resultType = RankedTensorType ::get(staticSizes, elementType);
+  build(b, result, resultType, dynamicSizes, b.getI64ArrayAttr(staticSizes));
+  result.addAttributes(attrs);
+}
+
+LogicalResult AllocTensorOp::verify() {
+  RankedTensorType resultType = getType();
+  SmallVector<int64_t, 4> staticSizes = llvm::to_vector<4>(llvm::map_range(
+      static_sizes().cast<ArrayAttr>(),
+      [](Attribute a) -> int64_t { return a.cast<IntegerAttr>().getInt(); }));
+
+  if (failed(verifyListOfOperandsOrIntegers(
+          *this, "sizes", resultType.getRank(), static_sizes(), sizes(),
+          ShapedType::isDynamic)))
+    return failure();
+
+  if (static_sizes().size() != static_cast<unsigned>(resultType.getRank()))
+    return emitError("expected ") << resultType.getRank() << " sizes values";
+
+  Type expectedType = AllocTensorOp::inferResultType(
+      staticSizes, resultType.getElementType(), resultType.getEncoding());
+  if (resultType != expectedType) {
+    return emitError("specified type ")
+           << resultType << " does not match the inferred type "
+           << expectedType;
+  }
+  return success();
+}
+
+Type AllocTensorOp::inferResultType(ArrayRef<int64_t> staticSizes,
+                                    Type elementType, Attribute encoding) {
+  return RankedTensorType::get(staticSizes, elementType, encoding);
+}
+
+SmallVector<OpFoldResult> AllocTensorOp::getMixedSizes() {
+  SmallVector<OpFoldResult> mixedSizes;
+  mixedSizes.reserve(getType().getRank());
+  unsigned dynamicValIndex = 0;
+  for (Attribute attr : static_sizes()) {
+    auto intAttr = attr.cast<IntegerAttr>();
+    if (!ShapedType::isDynamic(intAttr.getInt())) {
+      mixedSizes.push_back(intAttr);
+      continue;
+    }
+    mixedSizes.push_back(sizes()[dynamicValIndex++]);
+  }
+  return mixedSizes;
+}
+
+namespace {
+/// Change the type of the result of a `bufferization.alloc_tensor` by making
+/// the result type statically sized along dimension that in the original
+/// operation where defined as dynamic, but the size was defined using a
+/// `constant` op. For example:
+///
+///  %c5 = arith.constant 5: index
+///  %0 = bufferization.alloc_tensor [%arg0, %c5] : tensor<?x?xf32>
+///
+///  to
+///
+///  %0 = bufferization.alloc_tensor [%arg0, 5] : tensor<?x5xf32>
+struct ReplaceStaticShapeDims : OpRewritePattern<AllocTensorOp> {
+  using OpRewritePattern<AllocTensorOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(AllocTensorOp op,
+                                PatternRewriter &rewriter) const override {
+    SmallVector<Value, 4> dynamicSizes;
+    SmallVector<int64_t, 4> staticSizes;
+    for (unsigned i = 0, e = op.getType().getRank(); i != e; ++i) {
+      // If the size is already static, nothing to do.
+      if (!op.isDynamicSize(i)) {
+        staticSizes.push_back(op.getStaticSize(i));
+        continue;
+      }
+
+      // If the size is dynamic but defined using a `constant` op, get the
+      // constant value to find the static size to use.
+      unsigned operandNum = op.getIndexOfDynamicSize(i);
+      Value sizeOperand = op.getOperand(operandNum);
+      if (auto constantIndexOp =
+              sizeOperand.getDefiningOp<arith::ConstantIndexOp>()) {
+        staticSizes.push_back(constantIndexOp.value());
+        continue;
+      }
+
+      // Fallback case. Keep the size dynamic.
+      dynamicSizes.push_back(sizeOperand);
+      staticSizes.push_back(ShapedType::kDynamicSize);
+    }
+    RankedTensorType newType =
+        RankedTensorType::get(staticSizes, op.getType().getElementType());
+    if (newType == op.getType())
+      return failure();
+    auto newOp =
+        rewriter.create<AllocTensorOp>(op.getLoc(), newType, dynamicSizes,
+                                       rewriter.getI64ArrayAttr(staticSizes));
+    rewriter.replaceOpWithNewOp<tensor::CastOp>(op, op.getType(), newOp);
+    return success();
+  }
+};
+
+struct FoldDimOfAllocTensorOp : public OpRewritePattern<tensor::DimOp> {
+  using OpRewritePattern<tensor::DimOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(tensor::DimOp dimOp,
+                                PatternRewriter &rewriter) const override {
+    Optional<int64_t> maybeConstantIndex = dimOp.getConstantIndex();
+    auto allocTensorOp = dimOp.source().getDefiningOp<AllocTensorOp>();
+    if (!allocTensorOp || !maybeConstantIndex)
+      return failure();
+    if (!allocTensorOp.isDynamicSize(*maybeConstantIndex))
+      return failure();
+    rewriter.replaceOp(dimOp,
+                       allocTensorOp.getDynamicSize(*maybeConstantIndex));
+    return success();
+  }
+};
+} // namespace
+
+void AllocTensorOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                                MLIRContext *ctx) {
+  results.add<FoldDimOfAllocTensorOp, ReplaceStaticShapeDims>(ctx);
+}
+
+LogicalResult AllocTensorOp::reifyResultShapes(
+    OpBuilder &builder, ReifiedRankedShapedTypeDims &reifiedReturnShapes) {
+  auto shapes = llvm::to_vector<4>(llvm::map_range(
+      llvm::seq<int64_t>(0, getType().getRank()), [&](int64_t dim) -> Value {
+        if (isDynamicSize(dim))
+          return getDynamicSize(dim);
+        return builder.create<arith::ConstantIndexOp>(getLoc(),
+                                                      getStaticSize(dim));
+      }));
+  reifiedReturnShapes.emplace_back(std::move(shapes));
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // CloneOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt b/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt
--- a/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt
@@ -12,6 +12,7 @@
   MLIRBufferizationOpsIncGen
 
   LINK_LIBS PUBLIC
+  MLIRAffine
   MLIRDialect
   MLIRFunc
   MLIRIR
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/AllocTensorElimination.cpp b/mlir/lib/Dialect/Bufferization/Transforms/AllocTensorElimination.cpp
new file mode 100644
--- /dev/null
+++ b/mlir/lib/Dialect/Bufferization/Transforms/AllocTensorElimination.cpp
@@ -0,0 +1,272 @@
+//===- AllocTensorElimination.cpp - alloc_tensor op elimination -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PassDetail.h"
+
+#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
+#include "mlir/Dialect/Bufferization/Transforms/AllocTensorElimination.h"
+#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
+#include "mlir/Dialect/Bufferization/Transforms/Passes.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/IR/Dominance.h"
+#include "mlir/Pass/Pass.h"
+
+using namespace mlir;
+using namespace mlir::bufferization;
+
+/// Return true if all `neededValues` are in scope at the given
+/// `insertionPoint`.
+static bool
+neededValuesDominateInsertionPoint(const DominanceInfo &domInfo,
+                                   Operation *insertionPoint,
+                                   const SmallVector<Value> &neededValues) {
+  for (Value val : neededValues) {
+    if (auto bbArg = val.dyn_cast<BlockArgument>()) {
+      Block *owner = bbArg.getOwner();
+      if (!owner->findAncestorOpInBlock(*insertionPoint))
+        return false;
+    } else {
+      auto opResult = val.cast<OpResult>();
+      if (!domInfo.dominates(opResult.getOwner(), insertionPoint))
+        return false;
+    }
+  }
+  return true;
+}
+
+/// Return true if the given `insertionPoint` dominates all uses of
+/// `allocTensorOp`.
+static bool insertionPointDominatesUses(const DominanceInfo &domInfo,
+                                        Operation *insertionPoint,
+                                        Operation *allocTensorOp) {
+  for (Operation *user : allocTensorOp->getUsers())
+    if (!domInfo.dominates(insertionPoint, user))
+      return false;
+  return true;
+}
+
+/// Find a valid insertion point for a replacement of `allocTensorOp`, assuming
+/// that the replacement may use any value from `neededValues`.
+static Operation *
+findValidInsertionPoint(Operation *allocTensorOp,
+                        const SmallVector<Value> &neededValues) {
+  DominanceInfo domInfo;
+
+  // Gather all possible insertion points: the location of `allocTensorOp` and
+  // right after the definition of each value in `neededValues`.
+  SmallVector<Operation *> insertionPointCandidates;
+  insertionPointCandidates.push_back(allocTensorOp);
+  for (Value val : neededValues) {
+    // Note: The anchor op is using all of `neededValues`, so:
+    // * in case of a block argument: There must be at least one op in the block
+    //                                (the anchor op or one of its parents).
+    // * in case of an OpResult: There must be at least one op right after the
+    //                           defining op (the anchor op or one of its
+    //                           parents).
+    if (auto bbArg = val.dyn_cast<BlockArgument>()) {
+      insertionPointCandidates.push_back(
+          &bbArg.getOwner()->getOperations().front());
+    } else {
+      insertionPointCandidates.push_back(val.getDefiningOp()->getNextNode());
+    }
+  }
+
+  // Select first matching insertion point.
+  for (Operation *insertionPoint : insertionPointCandidates) {
+    // Check if all needed values are in scope.
+    if (!neededValuesDominateInsertionPoint(domInfo, insertionPoint,
+                                            neededValues))
+      continue;
+    // Check if the insertion point is before all uses.
+    if (!insertionPointDominatesUses(domInfo, insertionPoint, allocTensorOp))
+      continue;
+    return insertionPoint;
+  }
+
+  // No suitable insertion point was found.
+  return nullptr;
+}
+
+/// Try to eliminate AllocTensorOps inside `op`. An AllocTensorOp is replaced
+/// with the result of `rewriteFunc` if it is anchored on a matching
+/// OpOperand. "Anchored" means that there is a path on the reverse SSA use-def
+/// chain, starting from the OpOperand and always following the aliasing
+/// OpOperand, that eventually ends at a single AllocTensorOp.
+LogicalResult mlir::bufferization::eliminateAllocTensors(
+    RewriterBase &rewriter, Operation *op, AnalysisState &state,
+    AnchorMatchFn anchorMatchFunc, RewriteFn rewriteFunc) {
+  OpBuilder::InsertionGuard g(rewriter);
+
+  WalkResult status = op->walk([&](Operation *op) {
+    for (OpOperand &operand : op->getOpOperands()) {
+      // Skip operands that do not bufferize inplace.
+      if (!state.isInPlace(operand))
+        continue;
+      // All values that are needed to create the replacement op.
+      SmallVector<Value> neededValues;
+      // Is this a matching OpOperand?
+      if (!anchorMatchFunc(operand, neededValues))
+        continue;
+      SetVector<Value> maybeAllocTensor =
+          state.findValueInReverseUseDefChain(operand.get(), [&](Value val) {
+            // Continue traversal until this function returns true.
+            OpResult opResult = val.dyn_cast<OpResult>();
+            if (!opResult)
+              return true;
+            SmallVector<OpOperand *> opOperands =
+                state.getAliasingOpOperand(opResult);
+            if (!llvm::all_of(opOperands, [&](OpOperand *operand) {
+                  return state.isInPlace(*operand);
+                }))
+              return true;
+            // Only equivalent tensors are supported at the moment.
+            // TODO: Support cases such as extract_slice(alloc_tensor)
+            return !llvm::all_of(opOperands, [&](OpOperand *operand) {
+              return state.areEquivalentBufferizedValues(operand->get(),
+                                                         opResult);
+            });
+          });
+
+      // Replace only if the reverse use-def chain ends at exactly one
+      // AllocTensorOp.
+      if (maybeAllocTensor.size() != 1 ||
+          !maybeAllocTensor.front().getDefiningOp<AllocTensorOp>())
+        return WalkResult::skip();
+      Value allocTensor = maybeAllocTensor.front();
+
+      // Find a suitable insertion point.
+      Operation *insertionPoint =
+          findValidInsertionPoint(allocTensor.getDefiningOp(), neededValues);
+      if (!insertionPoint)
+        continue;
+
+      // Create a replacement for the AllocTensorOp.
+      rewriter.setInsertionPoint(insertionPoint);
+      Value replacement = rewriteFunc(rewriter, allocTensor.getLoc(), operand);
+      if (!replacement)
+        continue;
+
+      // Replace the AllocTensorOp.
+      rewriter.replaceOp(allocTensor.getDefiningOp(), replacement);
+    }
+
+    // Advance to the next operation.
+    return WalkResult::advance();
+  });
+
+  return failure(status.wasInterrupted());
+}
+
+/// Try to eliminate AllocTensorOps inside `op`. An AllocTensorOp can be
+/// eliminated if it is eventually inserted into another tensor (and some other
+/// conditions are met).
+///
+/// E.g.:
+/// %0 = linalg.alloc_tensor
+/// %1 = linalg.fill(%cst, %0) {inplace = [true]}
+/// %2 = tensor.insert_slice %1 into %t[10][20][1]
+///
+/// AllocTensorOp elimination will try to fill %t inplace instead of filling a
+/// new allocation %0 and inserting it into %t. This is done by replacing the
+/// AllocTensorOp with:
+///
+/// %0 = tensor.extract_slice %t[10][20][1]
+///
+/// The analysis looks for matching ExtractSliceOp/InsertSliceOp pairs and lets
+/// those bufferize inplace in the absence of other conflicts.
+///
+/// Starting from an InsertSliceOp, an AllocTensorOp at the end of the insert
+/// source's reverse use-def chain is eliminated if:
+/// * On the reverse use-def chain path from the InsertSliceOp to the
+///   AllocTensorOp, all ops were decided to bufferize inplace and the buffer
+///   relation is "equivalent" (TODO: can be relaxed if needed).
+/// * The reverse use-def chain has exactly one end, which is the AllocTensorOp.
+LogicalResult
+mlir::bufferization::insertSliceAnchoredAllocTensorEliminationStep(
+    RewriterBase &rewriter, Operation *op, AnalysisState &state) {
+  return eliminateAllocTensors(
+      rewriter, op, state,
+      /*anchorMatchFunc=*/
+      [&](OpOperand &operand, SmallVector<Value> &neededValues) {
+        auto insertSliceOp =
+            dyn_cast<tensor::InsertSliceOp>(operand.getOwner());
+        if (!insertSliceOp)
+          return false;
+        if (&operand != &insertSliceOp->getOpOperand(0) /*source*/)
+          return false;
+
+        // Collect all values that are needed to construct the replacement op.
+        neededValues.append(insertSliceOp.offsets().begin(),
+                            insertSliceOp.offsets().end());
+        neededValues.append(insertSliceOp.sizes().begin(),
+                            insertSliceOp.sizes().end());
+        neededValues.append(insertSliceOp.strides().begin(),
+                            insertSliceOp.strides().end());
+        neededValues.push_back(insertSliceOp.dest());
+
+        return true;
+      },
+      /*rewriteFunc=*/
+      [](OpBuilder &b, Location loc, OpOperand &operand) {
+        auto insertOp = cast<tensor::InsertSliceOp>(operand.getOwner());
+        // Expand offsets, sizes and strides to the full rank to handle the
+        // rank-reducing case.
+        SmallVector<OpFoldResult> mixedOffsets = insertOp.getMixedOffsets();
+        SmallVector<OpFoldResult> mixedSizes = insertOp.getMixedSizes();
+        SmallVector<OpFoldResult> mixedStrides = insertOp.getMixedStrides();
+        OffsetSizeAndStrideOpInterface::expandToRank(
+            insertOp.dest(), mixedOffsets, mixedSizes, mixedStrides,
+            [&](Value target, int64_t dim) -> OpFoldResult {
+              auto shapedType = target.getType().cast<ShapedType>();
+              if (shapedType.isDynamicDim(dim))
+                return b.create<tensor::DimOp>(loc, target, dim).result();
+              return b.getIndexAttr(shapedType.getDimSize(dim));
+            });
+        auto t = tensor::ExtractSliceOp::inferRankReducedResultType(
+            insertOp.getSourceType().getRank(),
+            insertOp.dest().getType().cast<RankedTensorType>(), mixedOffsets,
+            mixedSizes, mixedStrides);
+        auto extractOp = b.create<tensor::ExtractSliceOp>(
+            loc, t, insertOp.dest(), mixedOffsets, mixedSizes, mixedStrides);
+        return extractOp.result();
+      });
+}
+
+namespace {
+struct AllocTensorElimination
+    : public AllocTensorEliminationBase<AllocTensorElimination> {
+  AllocTensorElimination() = default;
+
+  void runOnOperation() override;
+
+  void getDependentDialects(DialectRegistry &registry) const override {
+    registry
+        .insert<bufferization::BufferizationDialect, tensor::TensorDialect>();
+  }
+};
+} // namespace
+
+void AllocTensorElimination::runOnOperation() {
+  Operation *op = getOperation();
+  OneShotBufferizationOptions options;
+  OneShotAnalysisState state(op, options);
+  if (failed(analyzeOp(op, state))) {
+    signalPassFailure();
+    return;
+  }
+
+  IRRewriter rewriter(op->getContext());
+  if (failed(bufferization::insertSliceAnchoredAllocTensorEliminationStep(
+          rewriter, op, state)))
+    signalPassFailure();
+}
+
+std::unique_ptr<Pass> mlir::bufferization::createAllocTensorEliminationPass() {
+  return std::make_unique<AllocTensorElimination>();
+}
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
--- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
@@ -237,6 +237,28 @@
 };
 } // namespace
 
+namespace {
+struct BufferizationBufferizePass
+    : public BufferizationBufferizeBase<BufferizationBufferizePass> {
+  void runOnOperation() override {
+    BufferizationOptions options = getPartialBufferizationOptions();
+    options.allowDialectInFilter<BufferizationDialect>();
+
+    if (failed(bufferizeOp(getOperation(), options)))
+      signalPassFailure();
+  }
+
+  void getDependentDialects(DialectRegistry &registry) const override {
+    registry
+        .insert<bufferization::BufferizationDialect, memref::MemRefDialect>();
+  }
+};
+} // namespace
+
+std::unique_ptr<Pass> mlir::bufferization::createBufferizationBufferizePass() {
+  return std::make_unique<BufferizationBufferizePass>();
+}
+
 std::unique_ptr<Pass> mlir::bufferization::createOneShotBufferizePass() {
   return std::make_unique<OneShotBufferizePass>();
 }
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt
--- a/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_mlir_dialect_library(MLIRBufferizationTransforms
+  AllocTensorElimination.cpp
   Bufferize.cpp
   BufferDeallocation.cpp
   BufferOptimizations.cpp
@@ -22,5 +23,6 @@
   MLIRIR
   MLIRMemRef
   MLIRPass
+  MLIRTensor
   MLIRTransforms
 )
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp
--- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp
@@ -379,7 +379,7 @@
 
 /// Return `true` if the given tensor value is a memory write. Most values are
 /// tensor writes, but ops that define a tensor SSA value without specifying its
-/// contents (e.g., init_tensor) are not.
+/// contents (e.g., alloc_tensor) are not.
 static bool isMemoryWrite(Value value, const AnalysisState &state) {
   auto opResult = value.dyn_cast<OpResult>();
   if (!opResult)
@@ -855,7 +855,7 @@
 /// %1 = scf.if %c -> (tensor<?xf32>) {
 ///   scf.yield %0 : tensor<?xf32>
 /// } else {
-///   %t = linalg.init_tensor : tensor<?xf32>
+///   %t = linalg.alloc_tensor : tensor<?xf32>
 ///   scf.yield %t : tensor<?xf32>
 /// }
 /// ```
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp
--- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp
@@ -32,7 +32,7 @@
 // Example: `foo` fails bufferization because %0 is not equivalent to any bbArg.
 // ```
 // func @foo() -> tensor<?xf32> {
-//   %0 = linalg.init_tensor [...] : tensor<?xf32>
+//   %0 = linalg.alloc_tensor [...] : tensor<?xf32>
 //   return %0 : tensor<?xf32>
 // }
 // ```
diff --git a/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -12,7 +12,6 @@
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/Dialect.h"
-#include "mlir/IR/Dominance.h"
 #include "mlir/IR/Operation.h"
 
 using namespace mlir;
@@ -219,32 +218,6 @@
   }
 };
 
-struct InitTensorOpInterface
-    : public BufferizableOpInterface::ExternalModel<InitTensorOpInterface,
-                                                    linalg::InitTensorOp> {
-  bool isMemoryWrite(Operation *op, OpResult opResult,
-                     const AnalysisState &state) const {
-    // InitTensorOps allocate but do not write.
-    return false;
-  }
-
-  LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
-                          BufferizationState &state) const {
-    auto initTensorOp = cast<linalg::InitTensorOp>(op);
-
-    // The InitTensorOp may have been eliminated.
-    if (initTensorOp->getUses().empty())
-      return success();
-
-    FailureOr<Value> alloc = state.createAlloc(rewriter, initTensorOp->getLoc(),
-                                               initTensorOp.result());
-    if (failed(alloc))
-      return failure();
-    replaceOpWithBufferizedValues(rewriter, op, *alloc);
-    return success();
-  }
-};
-
 /// Helper structure that iterates over all LinalgOps in `OpTys` and registers
 /// the `BufferizableOpInterface` with each of them.
 template <typename... Ops>
@@ -256,230 +229,9 @@
 };
 } // namespace
 
-/// Return true if all `neededValues` are in scope at the given
-/// `insertionPoint`.
-static bool
-neededValuesDominateInsertionPoint(const DominanceInfo &domInfo,
-                                   Operation *insertionPoint,
-                                   const SmallVector<Value> &neededValues) {
-  for (Value val : neededValues) {
-    if (auto bbArg = val.dyn_cast<BlockArgument>()) {
-      Block *owner = bbArg.getOwner();
-      if (!owner->findAncestorOpInBlock(*insertionPoint))
-        return false;
-    } else {
-      auto opResult = val.cast<OpResult>();
-      if (!domInfo.dominates(opResult.getOwner(), insertionPoint))
-        return false;
-    }
-  }
-  return true;
-}
-
-/// Return true if the given `insertionPoint` dominates all uses of
-/// `initTensorOp`.
-static bool insertionPointDominatesUses(const DominanceInfo &domInfo,
-                                        Operation *insertionPoint,
-                                        Operation *initTensorOp) {
-  for (Operation *user : initTensorOp->getUsers())
-    if (!domInfo.dominates(insertionPoint, user))
-      return false;
-  return true;
-}
-
-/// Find a valid insertion point for a replacement of `initTensorOp`, assuming
-/// that the replacement may use any value from `neededValues`.
-static Operation *
-findValidInsertionPoint(Operation *initTensorOp,
-                        const SmallVector<Value> &neededValues) {
-  DominanceInfo domInfo;
-
-  // Gather all possible insertion points: the location of `initTensorOp` and
-  // right after the definition of each value in `neededValues`.
-  SmallVector<Operation *> insertionPointCandidates;
-  insertionPointCandidates.push_back(initTensorOp);
-  for (Value val : neededValues) {
-    // Note: The anchor op is using all of `neededValues`, so:
-    // * in case of a block argument: There must be at least one op in the block
-    //                                (the anchor op or one of its parents).
-    // * in case of an OpResult: There must be at least one op right after the
-    //                           defining op (the anchor op or one of its
-    //                           parents).
-    if (auto bbArg = val.dyn_cast<BlockArgument>()) {
-      insertionPointCandidates.push_back(
-          &bbArg.getOwner()->getOperations().front());
-    } else {
-      insertionPointCandidates.push_back(val.getDefiningOp()->getNextNode());
-    }
-  }
-
-  // Select first matching insertion point.
-  for (Operation *insertionPoint : insertionPointCandidates) {
-    // Check if all needed values are in scope.
-    if (!neededValuesDominateInsertionPoint(domInfo, insertionPoint,
-                                            neededValues))
-      continue;
-    // Check if the insertion point is before all uses.
-    if (!insertionPointDominatesUses(domInfo, insertionPoint, initTensorOp))
-      continue;
-    return insertionPoint;
-  }
-
-  // No suitable insertion point was found.
-  return nullptr;
-}
-
-/// Try to eliminate InitTensorOps inside `op`. An InitTensorOp is replaced
-/// with the the result of `rewriteFunc` if it is anchored on a matching
-/// OpOperand. "Anchored" means that there is a path on the reverse SSA use-def
-/// chain, starting from the OpOperand and always following the aliasing
-/// OpOperand, that eventually ends at a single InitTensorOp.
-LogicalResult mlir::linalg::eliminateInitTensors(RewriterBase &rewriter,
-                                                 Operation *op,
-                                                 AnalysisState &state,
-                                                 AnchorMatchFn anchorMatchFunc,
-                                                 RewriteFn rewriteFunc) {
-  OpBuilder::InsertionGuard g(rewriter);
-
-  WalkResult status = op->walk([&](Operation *op) {
-    for (OpOperand &operand : op->getOpOperands()) {
-      // Skip operands that do not bufferize inplace.
-      if (!state.isInPlace(operand))
-        continue;
-      // All values that are needed to create the replacement op.
-      SmallVector<Value> neededValues;
-      // Is this a matching OpOperand?
-      if (!anchorMatchFunc(operand, neededValues))
-        continue;
-      SetVector<Value> maybeInitTensor =
-          state.findValueInReverseUseDefChain(operand.get(), [&](Value val) {
-            // Continue traversal until this function returns true.
-            OpResult opResult = val.dyn_cast<OpResult>();
-            if (!opResult)
-              return true;
-            SmallVector<OpOperand *> opOperands =
-                state.getAliasingOpOperand(opResult);
-            if (!llvm::all_of(opOperands, [&](OpOperand *operand) {
-                  return state.isInPlace(*operand);
-                }))
-              return true;
-            // Only equivalent tensors are supported at the moment.
-            // TODO: Support cases such as extract_slice(init_tensor)
-            return !llvm::all_of(opOperands, [&](OpOperand *operand) {
-              return state.areEquivalentBufferizedValues(operand->get(),
-                                                         opResult);
-            });
-          });
-
-      // Replace only if the reverse use-def chain ends at exactly one
-      // InitTensorOp.
-      if (maybeInitTensor.size() != 1 ||
-          !maybeInitTensor.front().getDefiningOp<InitTensorOp>())
-        return WalkResult::skip();
-      Value initTensor = maybeInitTensor.front();
-
-      // Find a suitable insertion point.
-      Operation *insertionPoint =
-          findValidInsertionPoint(initTensor.getDefiningOp(), neededValues);
-      if (!insertionPoint)
-        continue;
-
-      // Create a replacement for the InitTensorOp.
-      rewriter.setInsertionPoint(insertionPoint);
-      Value replacement = rewriteFunc(rewriter, initTensor.getLoc(), operand);
-      if (!replacement)
-        continue;
-
-      // Replace the InitTensorOp.
-      rewriter.replaceOp(initTensor.getDefiningOp(), replacement);
-    }
-
-    // Advance to the next operation.
-    return WalkResult::advance();
-  });
-
-  return failure(status.wasInterrupted());
-}
-
-/// Try to eliminate InitTensorOps inside `op`. An InitTensorOp can be
-/// eliminated if it is eventually inserted into another tensor (and some other
-/// conditions are met).
-///
-/// E.g.:
-/// %0 = linalg.init_tensor
-/// %1 = linalg.fill(%cst, %0) {inplace = [true]}
-/// %2 = tensor.insert_slice %1 into %t[10][20][1]
-///
-/// InitTensorOp elimination will try to fill %t inplace instead of filling a
-/// new allocation %0 and inserting it into %t. This is done by replacing the
-/// InitTensorOp with:
-///
-/// %0 = tensor.extract_slice %t[10][20][1]
-///
-/// The analysis looks for matching ExtractSliceOp/InsertSliceOp pairs and lets
-/// those bufferize inplace in the absence of other conflicts.
-///
-/// Starting from an InsertSliceOp, an InitTensorOp at the end of the insert
-/// source's reverse use-def chain is eliminated if:
-/// * On the reverse use-def chain path from the InsertSliceOp to the
-///   InitTensorOp, all ops were decided to bufferize inplace and the buffer
-///   relation is "equivalent" (TODO: can be relaxed if needed).
-/// * The reverse use-def chain has exactly one end, which is the InitTensorOp.
-LogicalResult mlir::linalg::insertSliceAnchoredInitTensorEliminationStep(
-    RewriterBase &rewriter, Operation *op, AnalysisState &state) {
-  return eliminateInitTensors(
-      rewriter, op, state,
-      /*anchorMatchFunc=*/
-      [&](OpOperand &operand, SmallVector<Value> &neededValues) {
-        auto insertSliceOp =
-            dyn_cast<tensor::InsertSliceOp>(operand.getOwner());
-        if (!insertSliceOp)
-          return false;
-        if (&operand != &insertSliceOp->getOpOperand(0) /*source*/)
-          return false;
-
-        // Collect all values that are needed to construct the replacement op.
-        neededValues.append(insertSliceOp.offsets().begin(),
-                            insertSliceOp.offsets().end());
-        neededValues.append(insertSliceOp.sizes().begin(),
-                            insertSliceOp.sizes().end());
-        neededValues.append(insertSliceOp.strides().begin(),
-                            insertSliceOp.strides().end());
-        neededValues.push_back(insertSliceOp.dest());
-
-        return true;
-      },
-      /*rewriteFunc=*/
-      [](OpBuilder &b, Location loc, OpOperand &operand) {
-        auto insertOp = cast<tensor::InsertSliceOp>(operand.getOwner());
-        // Expand offsets, sizes and strides to the full rank to handle the
-        // rank-reducing case.
-        SmallVector<OpFoldResult> mixedOffsets = insertOp.getMixedOffsets();
-        SmallVector<OpFoldResult> mixedSizes = insertOp.getMixedSizes();
-        SmallVector<OpFoldResult> mixedStrides = insertOp.getMixedStrides();
-        OffsetSizeAndStrideOpInterface::expandToRank(
-            insertOp.dest(), mixedOffsets, mixedSizes, mixedStrides,
-            [&](Value target, int64_t dim) -> OpFoldResult {
-              auto shapedType = target.getType().cast<ShapedType>();
-              if (shapedType.isDynamicDim(dim))
-                return b.create<tensor::DimOp>(loc, target, dim).result();
-              return b.getIndexAttr(shapedType.getDimSize(dim));
-            });
-        auto t = tensor::ExtractSliceOp::inferRankReducedResultType(
-            insertOp.getSourceType().getRank(),
-            insertOp.dest().getType().cast<RankedTensorType>(), mixedOffsets,
-            mixedSizes, mixedStrides);
-        auto extractOp = b.create<tensor::ExtractSliceOp>(
-            loc, t, insertOp.dest(), mixedOffsets, mixedSizes, mixedStrides);
-        return extractOp.result();
-      });
-}
-
 void mlir::linalg::registerBufferizableOpInterfaceExternalModels(
     DialectRegistry &registry) {
   registry.addExtension(+[](MLIRContext *ctx, linalg::LinalgDialect *dialect) {
-    linalg::InitTensorOp::attachInterface<InitTensorOpInterface>(*ctx);
-
     // Register all Linalg structured ops. `LinalgOp` is an interface and it is
     // not possible to attach an external interface to an existing interface.
     // Therefore, attach the `BufferizableOpInterface` to all ops one-by-one.
diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
--- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
@@ -13,7 +13,7 @@
   Generalization.cpp
   Hoisting.cpp
   HoistPadding.cpp
-  InitTensorElimination.cpp
+  InitTensorToAllocTensor.cpp
   InlineScalarOperands.cpp
   Interchange.cpp
   Loops.cpp
diff --git a/mlir/lib/Dialect/Linalg/Transforms/InitTensorElimination.cpp b/mlir/lib/Dialect/Linalg/Transforms/InitTensorElimination.cpp
deleted file mode 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/InitTensorElimination.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-//===- ComprehensiveBufferize.cpp - Single pass bufferization -------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "PassDetail.h"
-
-#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
-#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
-#include "mlir/Dialect/Linalg/Passes.h"
-#include "mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h"
-#include "mlir/Pass/Pass.h"
-
-using namespace mlir;
-using namespace mlir::bufferization;
-using namespace mlir::linalg;
-
-namespace {
-struct LinalgInitTensorElimination
-    : public LinalgInitTensorEliminationBase<LinalgInitTensorElimination> {
-  LinalgInitTensorElimination() = default;
-
-  void runOnOperation() override;
-
-  void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<linalg::LinalgDialect, tensor::TensorDialect>();
-  }
-};
-} // namespace
-
-void LinalgInitTensorElimination::runOnOperation() {
-  Operation *op = getOperation();
-  OneShotBufferizationOptions options;
-  OneShotAnalysisState state(op, options);
-  if (failed(analyzeOp(op, state))) {
-    signalPassFailure();
-    return;
-  }
-
-  IRRewriter rewriter(op->getContext());
-  if (failed(insertSliceAnchoredInitTensorEliminationStep(rewriter, op, state)))
-    signalPassFailure();
-}
-
-std::unique_ptr<Pass> mlir::createLinalgInitTensorEliminationPass() {
-  return std::make_unique<LinalgInitTensorElimination>();
-}
diff --git a/mlir/lib/Dialect/Linalg/Transforms/InitTensorToAllocTensor.cpp b/mlir/lib/Dialect/Linalg/Transforms/InitTensorToAllocTensor.cpp
new file mode 100644
--- /dev/null
+++ b/mlir/lib/Dialect/Linalg/Transforms/InitTensorToAllocTensor.cpp
@@ -0,0 +1,55 @@
+//===- InitTensorToAllocTensor.cpp - Lower init_tensor to alloc_tensor ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PassDetail.h"
+
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
+#include "mlir/Dialect/Linalg/Passes.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+
+using namespace mlir;
+using namespace mlir::bufferization;
+using namespace mlir::linalg;
+
+namespace {
+struct InitTensorLoweringPattern : public OpRewritePattern<InitTensorOp> {
+  using OpRewritePattern<InitTensorOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(InitTensorOp op,
+                                PatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<bufferization::AllocTensorOp>(
+        op, op.getMixedSizes(), op.getType().getElementType());
+    return success();
+  }
+};
+
+struct LinalgInitTensorToAllocTensor
+    : public LinalgInitTensorToAllocTensorBase<LinalgInitTensorToAllocTensor> {
+  LinalgInitTensorToAllocTensor() = default;
+
+  void runOnOperation() override;
+
+  void getDependentDialects(DialectRegistry &registry) const override {
+    registry
+        .insert<linalg::LinalgDialect, bufferization::BufferizationDialect>();
+  }
+};
+} // namespace
+
+void LinalgInitTensorToAllocTensor::runOnOperation() {
+  Operation *op = getOperation();
+  RewritePatternSet patterns(op->getContext());
+  patterns.insert<InitTensorLoweringPattern>(op->getContext());
+  if (failed(applyPatternsAndFoldGreedily(op, std::move(patterns))))
+    signalPassFailure();
+}
+
+std::unique_ptr<Pass> mlir::createLinalgInitTensorToAllocTensorPass() {
+  return std::make_unique<LinalgInitTensorToAllocTensor>();
+}
diff --git a/mlir/python/mlir/dialects/BufferizationOps.td b/mlir/python/mlir/dialects/BufferizationOps.td
new file mode 100644
--- /dev/null
+++ b/mlir/python/mlir/dialects/BufferizationOps.td
@@ -0,0 +1,15 @@
+//===-- BufferizationOps.td - Entry point for BufferizationOps bindings ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PYTHON_BINDINGS_BUFFERIZATION_OPS
+#define PYTHON_BINDINGS_BUFFERIZATION_OPS
+
+include "mlir/Bindings/Python/Attributes.td"
+include "mlir/Dialect/Bufferization/IR/BufferizationOps.td"
+
+#endif
diff --git a/mlir/python/mlir/dialects/_bufferization_ops_ext.py b/mlir/python/mlir/dialects/_bufferization_ops_ext.py
new file mode 100644
--- /dev/null
+++ b/mlir/python/mlir/dialects/_bufferization_ops_ext.py
@@ -0,0 +1,51 @@
+#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+#  See https://llvm.org/LICENSE.txt for license information.
+#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+try:
+  from typing import Sequence, Union
+  from ..ir import *
+  from ._ods_common import get_default_loc_context as _get_default_loc_context
+
+  from typing import Any, List, Union
+except ImportError as e:
+  raise RuntimeError("Error loading imports from extension module") from e
+
+
+class AllocTensorOp:
+  """Extends the bufferization.alloc_tensor op."""
+
+  def __init__(self,
+               sizes: Union[Sequence[int], Sequence[Value]],
+               element_type: Type,
+               *,
+               loc=None,
+               ip=None):
+    """Constructs an `alloc_tensor` with either static or dynamic sizes."""
+    context = get_default_loc_context(loc)
+    operands = []
+    attributes = {}
+    # TODO: Refactor the AllocTensorOp to take an element type attribute and
+    # then use normal result type inference, unifying the Python and C++ side
+    # with a standard mechanism (versus stashing that in builders).
+    if sizes and isinstance(sizes[0], Value):
+      # Dynamic sizes.
+      operands.extend(sizes)
+      static_size_ints = [-1] * len(sizes)
+      result_type = RankedTensorType.get(static_size_ints, element_type)
+    else:
+      # Static sizes.
+      result_type = RankedTensorType.get(sizes, element_type)
+      static_size_ints = sizes
+
+    i64_type = IntegerType.get_signless(64)
+    attributes["static_sizes"] = ArrayAttr.get(
+        [IntegerAttr.get(i64_type, s) for s in static_size_ints],
+        context=context)
+    op = self.build_generic(
+        results=[result_type],
+        operands=operands,
+        attributes=attributes,
+        loc=loc,
+        ip=ip)
+    OpView.__init__(self, op)
diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-init-tensor-elimination.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir
rename from mlir/test/Dialect/Linalg/one-shot-bufferize-init-tensor-elimination.mlir
rename to mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir
--- a/mlir/test/Dialect/Linalg/one-shot-bufferize-init-tensor-elimination.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -linalg-eliminate-init-tensors -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs" -canonicalize -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -eliminate-alloc-tensors -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs" -canonicalize -split-input-file | FileCheck %s
 
 //      CHECK: func @buffer_forwarding_conflict(
 // CHECK-SAME:   %[[FUNC_ARG:[0-9a-zA-Z]*]]: memref<?xf32>
@@ -14,11 +14,11 @@
   // This allocs the whole dim to allow for a full clone of t.
   //     CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM]])
 
-  // init_tensor itself does not alloc but forwards to the **second**
-  // insert_slice. InitTensorOp replaces the init_tensor with an out-of-place
+  // alloc_tensor itself does not alloc but forwards to the **second**
+  // insert_slice. AllocTensorOp replaces the alloc_tensor with an out-of-place
   // extract_slice.
   //     CHECK: %[[EXTRACT_SLICE_ALLOC:.*]] = memref.alloc(%[[sz]])
-  %a = linalg.init_tensor[%sz] : tensor<?xf32>
+  %a = bufferization.alloc_tensor[%sz] : tensor<?xf32>
 
   //     CHECK: linalg.fill ins({{.*}} : f32) outs(%[[EXTRACT_SLICE_ALLOC]] : memref<?xf32>)
   %f = linalg.fill ins(%f0 : f32) outs(%a : tensor<?xf32>) -> tensor<?xf32>
@@ -47,10 +47,10 @@
 {
   %f0 = arith.constant 0.0: f32
 
-  // init_tensor itself does not alloc but forwards to the insert_slice.
-  // InitTensorOp replaces the init_tensor with an inplace extract_slice.
+  // alloc_tensor itself does not alloc but forwards to the insert_slice.
+  // InitTensorOp replaces the alloc_tensor with an inplace extract_slice.
   // CHECK: %[[T_SUBVIEW:.*]] =  memref.subview %[[FUNC_ARG]][42] [%[[sz]]] [1]
-  %a = linalg.init_tensor[%sz] : tensor<?xf32>
+  %a = bufferization.alloc_tensor[%sz] : tensor<?xf32>
 
   // CHECK: linalg.fill ins({{.*}} : f32) outs(%[[T_SUBVIEW]] : memref<?xf32
   %f = linalg.fill ins(%f0 : f32) outs(%a : tensor<?xf32>) -> tensor<?xf32>
@@ -71,7 +71,7 @@
   %c5 = arith.constant 5 : index
 
   // CHECK-NOT: memref.alloc
-  %blank = linalg.init_tensor [5] : tensor<5xf32>
+  %blank = bufferization.alloc_tensor [5] : tensor<5xf32>
 
   // CHECK: scf.for %[[iv:.*]] = %{{.*}} to %[[sz]] step %{{.*}} {
   %r = scf.for %iv = %c0 to %sz step %c5 iter_args(%bb = %t) -> (tensor<?xf32>) {
@@ -102,7 +102,7 @@
 
   // CHECK-NOT: memref.alloc
   // CHECK: %[[subview:.*]] = memref.subview %[[t]][%[[idx]]] [5] [1]
-  %blank = linalg.init_tensor [5] : tensor<5xf32>
+  %blank = bufferization.alloc_tensor [5] : tensor<5xf32>
 
   // CHECK: scf.for %[[iv:.*]] = %{{.*}} to %[[sz]] step %{{.*}} {
   %r = scf.for %iv = %c0 to %sz step %c5 iter_args(%bb = %t) -> (tensor<?xf32>) {
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir
@@ -16,7 +16,7 @@
     // CHECK-NOT: dealloc
     // CHECK: scf.yield %[[casted]]
     %sz = "test.some_op"() : () -> (index)
-    %0 = linalg.init_tensor[%sz] : tensor<?xf32>
+    %0 = bufferization.alloc_tensor[%sz] : tensor<?xf32>
     scf.yield %0 : tensor<?xf32>
   } else {
   // CHECK: } else {
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir
@@ -138,11 +138,11 @@
   %idx = arith.constant 0 : index
   %cst = arith.constant 5.0 : f32
 
-  // One alloc for the init_tensor, another one because the transfer_write
+  // One alloc for the alloc_tensor, another one because the transfer_write
   // bufferizes out-of-place.
   // CHECK: %[[m1:.*]] = memref.alloc() {{.*}} : memref<10xf32>
   // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10xf32>
-  %t1 = linalg.init_tensor [10] : tensor<10xf32>
+  %t1 = bufferization.alloc_tensor [10] : tensor<10xf32>
 
   // CHECK: linalg.fill ins(%{{.*}}{{.*}}outs(%[[m1]]
   // CHECK: %[[filled_tensor:.*]] = bufferization.to_tensor %[[m1]]
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir
@@ -44,7 +44,7 @@
 // CHECK-LABEL: func @func_without_tensor_args
 func.func @func_without_tensor_args(%v : vector<10xf32>) -> () {
   // CHECK: %[[alloc:.*]] = memref.alloc()
-  %0 = linalg.init_tensor[10] : tensor<10xf32>
+  %0 = bufferization.alloc_tensor[10] : tensor<10xf32>
 
   %c0 = arith.constant 0 : index
   // CHECK: vector.transfer_write %{{.*}}, %[[alloc]]
@@ -97,7 +97,7 @@
 // CHECK-LABEL: func @copy_deallocated(
 func.func @copy_deallocated() -> tensor<10xf32> {
   // CHECK: %[[alloc:.*]] = memref.alloc()
-  %0 = linalg.init_tensor[10] : tensor<10xf32>
+  %0 = bufferization.alloc_tensor[10] : tensor<10xf32>
   // CHECK: %[[alloc_tensor:.*]] = bufferization.to_tensor %[[alloc]]
   // CHECK: memref.dealloc %[[alloc]]
   // CHECK: return %[[alloc_tensor]]
@@ -111,7 +111,7 @@
 func.func @select_different_tensors(%t: tensor<?xf32>, %sz: index, %c: i1) -> tensor<?xf32> {
   // CHECK-DAG: %[[m:.*]] = bufferization.to_memref %[[t]] : memref<?xf32, #{{.*}}>
   // CHECK-DAG: %[[alloc:.*]] = memref.alloc(%{{.*}}) {{.*}} : memref<?xf32>
-  %0 = linalg.init_tensor [%sz] : tensor<?xf32>
+  %0 = bufferization.alloc_tensor [%sz] : tensor<?xf32>
 
   // A cast must be inserted because %t and %0 have different memref types.
   // CHECK: %[[casted:.*]] = memref.cast %[[alloc]] : memref<?xf32> to memref<?xf32, #{{.*}}>
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir
@@ -17,7 +17,7 @@
 //       CHECK:   %[[alloc:.*]] = memref.alloc
 //       CHECK:   return %[[alloc]]
 func.func @create_tensor() -> tensor<10xf32> {
-  %0 = linalg.init_tensor [10] : tensor<10xf32>
+  %0 = bufferization.alloc_tensor [10] : tensor<10xf32>
   return %0 : tensor<10xf32>
 }
 
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir
@@ -682,7 +682,7 @@
   %cst_0 = arith.constant 0.000000e+00 : f32
   %cst_1 = arith.constant 1.000000e+00 : f32
 
-  %7 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
+  %7 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32>
 
   //      CHECK: linalg.fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
@@ -720,7 +720,7 @@
   %cst_0 = arith.constant 0.000000e+00 : f32
   %cst_1 = arith.constant 1.000000e+00 : f32
 
-  %7 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
+  %7 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32>
 
   //     CHECK: linalg.fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
@@ -1246,19 +1246,19 @@
 
 // -----
 
-// CHECK-LABEL: func @write_to_same_init_tensor_in_place(
-func.func @write_to_same_init_tensor_in_place(
+// CHECK-LABEL: func @write_to_same_alloc_tensor_in_place(
+func.func @write_to_same_alloc_tensor_in_place(
     %A : tensor<?xf32> {linalg.inplaceable = true},
     %lb : index, %ub : index, %step : index, %sz: index, %sz2: index)
   -> (tensor<?xf32>)
 {
-  %B = linalg.init_tensor [%sz2] : tensor<?xf32>
+  %B = bufferization.alloc_tensor [%sz2] : tensor<?xf32>
 
   // CHECK: scf.for {{.*}} {
   %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
     %i2 = arith.index_cast %i : index to i32
     %i3 = arith.sitofp %i2 : i32 to f32
-    // %B is written multiple times inside a loop, but it is an init_tensor.
+    // %B is written multiple times inside a loop, but it is an alloc_tensor.
     //      CHECK: tensor.insert
     // CHECK-SAME:   {__inplace_operands_attr__ = ["none", "true", "none"]}
     %B2 = tensor.insert %i3 into %B[%i] : tensor<?xf32>
@@ -1274,13 +1274,13 @@
 
 // -----
 
-// CHECK-LABEL: func @write_to_same_init_tensor_out_of_place(
-func.func @write_to_same_init_tensor_out_of_place(
+// CHECK-LABEL: func @write_to_same_alloc_tensor_out_of_place(
+func.func @write_to_same_alloc_tensor_out_of_place(
     %A : tensor<?xf32> {linalg.inplaceable = true},
     %lb : index, %ub : index, %step : index, %sz: index, %sz2: index, %f: f32)
   -> (tensor<?xf32>)
 {
-  %B = linalg.init_tensor [%sz2] : tensor<?xf32>
+  %B = bufferization.alloc_tensor [%sz2] : tensor<?xf32>
   %C = tensor.insert %f into %B[%lb] : tensor<?xf32>
 
   // CHECK: scf.for {{.*}} {
@@ -1288,8 +1288,8 @@
     %i2 = arith.index_cast %i : index to i32
     %i3 = arith.sitofp %i2 : i32 to f32
     // %C is written multiple times inside a loop. Even though %C aliases with
-    // an init_tensor, out-of-bounds bufferization is necessary because there is
-    // another alias (%C) outside of the loop.
+    // an alloc_tensor, out-of-bounds bufferization is necessary because there
+    // is another alias (%C) outside of the loop.
     //      CHECK: tensor.insert
     // CHECK-SAME:   {__inplace_operands_attr__ = ["none", "false", "none"]}
     %B2 = tensor.insert %i3 into %C[%i] : tensor<?xf32>
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir
@@ -60,7 +60,7 @@
     scf.yield %t1 : tensor<?xf32>
   } else {
     // This buffer aliases.
-    %t2 = linalg.init_tensor [%idx] : tensor<?xf32>
+    %t2 = bufferization.alloc_tensor [%idx] : tensor<?xf32>
     // expected-error @+1 {{operand #0 of ReturnLike op does not satisfy destination passing style}}
     scf.yield %t2 : tensor<?xf32>
   }
@@ -221,7 +221,7 @@
 
 func.func @mini_test_case1() -> tensor<10x20xf32> {
   %f0 = arith.constant 0.0 : f32
-  %t = linalg.init_tensor [10, 20] : tensor<10x20xf32>
+  %t = bufferization.alloc_tensor [10, 20] : tensor<10x20xf32>
   %r = linalg.fill ins(%f0 : f32) outs(%t : tensor<10x20xf32>) -> tensor<10x20xf32>
   // expected-error @+1 {{operand #0 of ReturnLike op does not satisfy destination passing style}}
   return %r : tensor<10x20xf32>
@@ -274,7 +274,7 @@
 // -----
 
 func.func @foo(%t : tensor<5xf32>) -> (tensor<5xf32>) {
-  %0 = linalg.init_tensor [5] : tensor<5xf32>
+  %0 = bufferization.alloc_tensor [5] : tensor<5xf32>
   // expected-error @+1 {{operand #0 of ReturnLike op does not satisfy destination passing style}}
   return %0 : tensor<5xf32>
 }
@@ -291,7 +291,7 @@
 func.func @destination_passing_style_dominance_test_1(%cst : f32, %idx : index,
                                                  %idx2 : index) -> f32 {
   %0 = scf.execute_region -> tensor<?xf32> {
-    %1 = linalg.init_tensor [%idx] : tensor<?xf32>
+    %1 = bufferization.alloc_tensor [%idx] : tensor<?xf32>
     // expected-error @+1 {{operand #0 of ReturnLike op does not satisfy destination passing style}}
     scf.yield %1 : tensor<?xf32>
   }
@@ -304,7 +304,7 @@
 
 func.func @destination_passing_style_dominance_test_2(%cst : f32, %idx : index,
                                                  %idx2 : index) -> f32 {
-  %1 = linalg.init_tensor [%idx] : tensor<?xf32>
+  %1 = bufferization.alloc_tensor [%idx] : tensor<?xf32>
 
   %0 = scf.execute_region -> tensor<?xf32> {
     // This YieldOp is in destination-passing style, thus no error.
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
@@ -70,7 +70,7 @@
 //  CHECK-FULLY-DYNAMIC-LAYOUT-MAP-SAME: #[[$map2a]]> {
 func.func @return_extract_slice(%idx: index, %sz: index) -> (tensor<2x?xf32>)
 {
-  %t = linalg.init_tensor [20, 10] : tensor<20x10xf32>
+  %t = bufferization.alloc_tensor [20, 10] : tensor<20x10xf32>
   %0 = tensor.extract_slice %t[%idx, %idx][2, %sz][1, 1]
       : tensor<20x10xf32> to tensor<2x?xf32>
   return %0 : tensor<2x?xf32>
@@ -120,7 +120,7 @@
 // CHECK-LABEL: func @func_without_tensor_args
 func.func @func_without_tensor_args(%v : vector<10xf32>) -> () {
   // CHECK: %[[alloc:.*]] = memref.alloc()
-  %0 = linalg.init_tensor[10] : tensor<10xf32>
+  %0 = bufferization.alloc_tensor[10] : tensor<10xf32>
 
   %c0 = arith.constant 0 : index
   // CHECK: vector.transfer_write %{{.*}}, %[[alloc]]
@@ -456,9 +456,9 @@
   //  CHECK-DAG:   %[[cA:.*]] = memref.cast %[[A]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
   //  CHECK-DAG:   %[[cB:.*]] = memref.cast %[[B]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
   //  CHECK-DAG:   %[[cC:.*]] = memref.cast %[[C]] : memref<f32> to memref<f32, #[[$DYN_0D_MAP]]>
-  %A = linalg.init_tensor [64] : tensor<64xf32>
-  %B = linalg.init_tensor [64] : tensor<64xf32>
-  %C = linalg.init_tensor [] : tensor<f32>
+  %A = bufferization.alloc_tensor [64] : tensor<64xf32>
+  %B = bufferization.alloc_tensor [64] : tensor<64xf32>
+  %C = bufferization.alloc_tensor [] : tensor<f32>
 
   //  CHECK-DAG:   linalg.fill ins(%[[C1]] : f32) outs(%[[A]] : memref<64xf32>)
   //  CHECK-DAG:   linalg.fill ins(%[[C2]] : f32) outs(%[[B]] : memref<64xf32>)
diff --git a/mlir/test/Dialect/Bufferization/canonicalize.mlir b/mlir/test/Dialect/Bufferization/canonicalize.mlir
--- a/mlir/test/Dialect/Bufferization/canonicalize.mlir
+++ b/mlir/test/Dialect/Bufferization/canonicalize.mlir
@@ -243,3 +243,16 @@
 //       CHECK: %[[RES:.*]] = tensor.extract %[[TENSOR]][%[[IDX0]], %[[IDX1]]]
 //   CHECK-NOT: memref.load
 //       CHECK: return %[[RES]] : f32
+
+
+// -----
+
+func.func @alloc_tensor_canonicalize() -> (tensor<4x5x?xf32>) {
+  %c6 = arith.constant 6 : index
+  %0 = bufferization.alloc_tensor [4, 5, %c6] : tensor<4x5x?xf32>
+  return %0 : tensor<4x5x?xf32>
+}
+// CHECK: func @alloc_tensor_canonicalize
+// CHECK:   %[[T0:.+]] = bufferization.alloc_tensor [4, 5, 6] : tensor<4x5x6xf32>
+// CHECK:   %[[T1:.+]] = tensor.cast %[[T0]] : tensor<4x5x6xf32> to tensor<4x5x?xf32>
+// CHECK:   return %[[T1]]
diff --git a/mlir/test/Dialect/Bufferization/invalid.mlir b/mlir/test/Dialect/Bufferization/invalid.mlir
new file mode 100644
--- /dev/null
+++ b/mlir/test/Dialect/Bufferization/invalid.mlir
@@ -0,0 +1,26 @@
+// RUN: mlir-opt %s -split-input-file -verify-diagnostics
+
+func.func @alloc_tensor_err(%arg0 : index, %arg1 : index)
+{
+  // expected-error @+1 {{specified type 'tensor<4x?x?x5xf32>' does not match the inferred type 'tensor<4x5x?x?xf32>'}}
+  %1 = bufferization.alloc_tensor [4, 5, %arg0, %arg1] : tensor<4x?x?x5xf32>
+  return
+}
+
+// -----
+
+func.func @alloc_tensor_err(%arg0 : index)
+{
+  // expected-error @+1 {{expected 4 sizes values}}
+  %1 = bufferization.alloc_tensor [4, 5, %arg0] : tensor<4x?x?x5xf32>
+  return
+}
+
+// -----
+
+func.func @alloc_tensor_err(%arg0 : index)
+{
+  // expected-error @+1 {{expected 2 dynamic sizes values}}
+  %1 = "bufferization.alloc_tensor"(%arg0) {static_sizes = [4, -1, -1, 5]} : (index) -> tensor<4x?x?x5xf32>
+  return
+}
diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir
--- a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir
+++ b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only bufferize-function-boundaries" -split-input-file | FileCheck %s
 
-/// All combinations of matmul(fill(extract(init_tensor)), fill(extract(%init_tensor)), %arg2)
+/// All combinations of matmul(fill(extract(alloc_tensor)), fill(extract(%alloc_tensor)), %arg2)
 /// These should all be inplaceable except the first op.
 
 // -----
@@ -15,7 +15,7 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
   %cst_0 = arith.constant 1.000000e+00 : f32
-  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
+  %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
   %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
@@ -42,7 +42,7 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
   %cst_0 = arith.constant 1.000000e+00 : f32
-  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
+  %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
   %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
@@ -69,7 +69,7 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
   %cst_0 = arith.constant 1.000000e+00 : f32
-  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
+  %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
   %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
@@ -96,7 +96,7 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
   %cst_0 = arith.constant 1.000000e+00 : f32
-  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
+  %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
   %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
@@ -123,7 +123,7 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
   %cst_0 = arith.constant 1.000000e+00 : f32
-  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
+  %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
   %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
@@ -150,7 +150,7 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
   %cst_0 = arith.constant 1.000000e+00 : f32
-  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
+  %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
   %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
@@ -177,7 +177,7 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
   %cst_0 = arith.constant 1.000000e+00 : f32
-  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
+  %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
   %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
@@ -204,7 +204,7 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
   %cst_0 = arith.constant 1.000000e+00 : f32
-  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
+  %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
   %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
@@ -231,7 +231,7 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
   %cst_0 = arith.constant 1.000000e+00 : f32
-  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
+  %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
   %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
@@ -258,7 +258,7 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
   %cst_0 = arith.constant 1.000000e+00 : f32
-  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
+  %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
   %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
@@ -285,7 +285,7 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
   %cst_0 = arith.constant 1.000000e+00 : f32
-  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
+  %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
   %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
@@ -312,7 +312,7 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
   %cst_0 = arith.constant 1.000000e+00 : f32
-  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
+  %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
   %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
@@ -339,7 +339,7 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
   %cst_0 = arith.constant 1.000000e+00 : f32
-  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
+  %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
@@ -366,7 +366,7 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
   %cst_0 = arith.constant 1.000000e+00 : f32
-  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
+  %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
@@ -392,7 +392,7 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
   %cst_0 = arith.constant 1.000000e+00 : f32
-  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
+  %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
@@ -419,7 +419,7 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
   %cst_0 = arith.constant 1.000000e+00 : f32
-  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
+  %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
@@ -446,7 +446,7 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
   %cst_0 = arith.constant 1.000000e+00 : f32
-  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
+  %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
@@ -473,7 +473,7 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
   %cst_0 = arith.constant 1.000000e+00 : f32
-  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
+  %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
@@ -500,7 +500,7 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
   %cst_0 = arith.constant 1.000000e+00 : f32
-  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
+  %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
@@ -527,7 +527,7 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
   %cst_0 = arith.constant 1.000000e+00 : f32
-  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
+  %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
@@ -554,7 +554,7 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
   %cst_0 = arith.constant 1.000000e+00 : f32
-  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
+  %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
@@ -581,7 +581,7 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
   %cst_0 = arith.constant 1.000000e+00 : f32
-  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
+  %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
@@ -608,7 +608,7 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
   %cst_0 = arith.constant 1.000000e+00 : f32
-  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
+  %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
@@ -635,7 +635,7 @@
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
   %cst_0 = arith.constant 1.000000e+00 : f32
-  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
+  %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir
--- a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir
+++ b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -linalg-eliminate-init-tensors -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs" -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -eliminate-alloc-tensors -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs" -split-input-file | FileCheck %s
 
 //===----------------------------------------------------------------------===//
 // InitTensorOp elimination
@@ -10,7 +10,7 @@
   //      CHECK: tensor.extract_slice
   // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none"]
   // Instead of allocating, share buffer with some inplace bufferization?
-  %0 = linalg.init_tensor [%arg1] : tensor<?xf32>
+  %0 = bufferization.alloc_tensor [%arg1] : tensor<?xf32>
 
   //      CHECK: linalg.fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]
@@ -37,7 +37,7 @@
   //      CHECK: tensor.extract_slice
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]
   // Instead of allocating, share buffer with some inplace bufferization?
-  %0 = linalg.init_tensor [%arg1] : tensor<?xf32>
+  %0 = bufferization.alloc_tensor [%arg1] : tensor<?xf32>
 
   //      CHECK: linalg.fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]
diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
--- a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
@@ -339,7 +339,7 @@
 // -----
 
 //===----------------------------------------------------------------------===//
-// InitTensorOp elimination would produce SSA violations for the example below.
+// AllocTensorOp elimination would produce SSA violations for the example below.
 //===----------------------------------------------------------------------===//
 
 func.func @depthwise_conv_1d_nwc_wc(%arg0: index, %arg1: index, %arg2: tensor<8x18x32xf32>)
@@ -347,9 +347,9 @@
   %c0 = arith.constant 0 : index
   %c32 = arith.constant 32 : index
   %c8 = arith.constant 8 : index
-  %0 = linalg.init_tensor [4, 1, 6, 8] : tensor<4x1x6x8xf32>
+  %0 = bufferization.alloc_tensor [4, 1, 6, 8] : tensor<4x1x6x8xf32>
   %1 = tensor.cast %0 : tensor<4x1x6x8xf32> to tensor<?x1x6x8xf32>
-  %2 = linalg.init_tensor [1, 6, 8] : tensor<1x6x8xf32>
+  %2 = bufferization.alloc_tensor [1, 6, 8] : tensor<1x6x8xf32>
   %3 = scf.for %arg3 = %c0 to %c32 step %c8 iter_args(%arg4 = %1) -> (tensor<?x1x6x8xf32>) {
     %4 = affine.apply affine_map<(d0) -> (d0 ceildiv 8)>(%arg3)
     %5 = tensor.insert_slice %2 into %arg4[%4,0, 0, 0] [1, 1, 6, 8] [1, 1, 1, 1] :
@@ -361,8 +361,8 @@
 
 // -----
 
-// CHECK-LABEL: func @do_not_copy_init_tensors(
-func.func @do_not_copy_init_tensors(%f1: f32, %f2: f32, %idx: index)
+// CHECK-LABEL: func @do_not_copy_alloc_tensors(
+func.func @do_not_copy_alloc_tensors(%f1: f32, %f2: f32, %idx: index)
   -> (tensor<5xf32>, tensor<5xf32>)
 {
   // CHECK: memref.alloc
@@ -370,7 +370,7 @@
   // CHECK-NOT: copy
   // CHECK: memref.store
   // CHECK: memref.store
-  %0 = linalg.init_tensor [5] : tensor<5xf32>
+  %0 = bufferization.alloc_tensor [5] : tensor<5xf32>
   %1 = tensor.insert %f1 into %0[%idx] : tensor<5xf32>
   %2 = tensor.insert %f2 into %0[%idx] : tensor<5xf32>
   return %1, %2 : tensor<5xf32>, tensor<5xf32>
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
--- a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
@@ -583,7 +583,7 @@
 {
   // CHECK: scf.for {{.*}} {
   %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
-    %B = linalg.init_tensor [%sz] : tensor<?xf32>
+    %B = bufferization.alloc_tensor [%sz] : tensor<?xf32>
     %i2 = arith.index_cast %i : index to i32
     %i3 = arith.sitofp %i2 : i32 to f32
     // The tensor.insert is in-place because the %B is defined inside the loop.
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
--- a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
@@ -220,7 +220,7 @@
 //       CHECK:   return %[[r]]
 func.func @scf_execute_region_yield_non_equivalent(%i: index, %j: index) -> f32 {
   %r = scf.execute_region -> (tensor<?xf32>) {
-    %t2 = linalg.init_tensor [%i] : tensor<?xf32>
+    %t2 = bufferization.alloc_tensor [%i] : tensor<?xf32>
     scf.yield %t2 : tensor<?xf32>
   }
   %f = tensor.extract %r[%j] : tensor<?xf32>
@@ -261,7 +261,7 @@
 //  CHECK-SAME:     %[[t:.*]]: memref<?xf32
 //       CHECK:   %[[cloned:.*]] = bufferization.clone %[[t]]
 //       CHECK:   %[[for:.*]] = scf.for {{.*}} iter_args(%[[iter:.*]] = %[[cloned]])
-// This alloc is for the linalg.init_tensor.
+// This alloc is for the bufferization.alloc_tensor.
 //   CHECK-DAG:     %[[alloc2:.*]] = memref.alloc(%{{.*}})
 //   CHECK-DAG:     memref.dealloc %[[iter]]
 // This alloc is for the scf.yield.
@@ -274,7 +274,7 @@
 func.func @scf_for_yield_allocation(%t: tensor<?xf32>, %lb : index, %ub : index,
                                %step : index) -> tensor<?xf32> {
   %r = scf.for %i = %lb to %ub step %step iter_args(%a = %t) -> tensor<?xf32> {
-    %t2 = linalg.init_tensor [%i] : tensor<?xf32>
+    %t2 = bufferization.alloc_tensor [%i] : tensor<?xf32>
     scf.yield %t2 : tensor<?xf32>
   }
 
diff --git a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
--- a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
@@ -179,9 +179,9 @@
   %c8 = arith.constant 8 : index
   %c32 = arith.constant 32 : index
   %c0 = arith.constant 0 : index
-  %0 = linalg.init_tensor [4, 1, 6, 8] : tensor<4x1x6x8xf32>
+  %0 = bufferization.alloc_tensor [4, 1, 6, 8] : tensor<4x1x6x8xf32>
   %1 = tensor.cast %0 : tensor<4x1x6x8xf32> to tensor<?x1x6x8xf32>
-  %2 = linalg.init_tensor [1, 6, 8] : tensor<1x6x8xf32>
+  %2 = bufferization.alloc_tensor [1, 6, 8] : tensor<1x6x8xf32>
   %5 = scf.for %arg7 = %c0 to %c32 step %c8 iter_args(%arg8 = %1) -> (tensor<?x1x6x8xf32>) {
     %7 = affine.apply affine_map<(d0) -> (d0 ceildiv 8)>(%arg7)
     %8 = tensor.extract_slice %arg0[%i, %j, %arg7] [1, 6, 8] [1, 1, 1] : tensor<8x18x32xf32> to tensor<1x6x8xf32>
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir
@@ -16,7 +16,7 @@
   %c0 = arith.constant 0 : index
   %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<f32>) -> tensor<f32>
   %1 = affine.apply #map0(%c0, %c64)[%c2]
-  %2 = linalg.init_tensor [%1, 2] : tensor<?x2xf32>
+  %2 = bufferization.alloc_tensor [%1, 2] : tensor<?x2xf32>
   %3 = scf.for %arg3 = %c0 to %c64 step %c2 iter_args(%arg4 = %2) -> (tensor<?x2xf32>) {
     %8 = affine.apply #map1(%arg3, %c0)[%c2]
     %9 = tensor.extract_slice %arg1[%arg3] [2] [1] : tensor<64xf32> to tensor<2xf32>
@@ -33,7 +33,7 @@
   // call @printMemrefF32(%B) : (tensor<*xf32>) -> ()
 
   %4 = affine.apply #map0(%c0, %c64)[%c2]
-  %5 = linalg.init_tensor [%4, 2] : tensor<?x2xf32>
+  %5 = bufferization.alloc_tensor [%4, 2] : tensor<?x2xf32>
   %6 = scf.for %arg3 = %c0 to %c64 step %c2 iter_args(%arg4 = %5) -> (tensor<?x2xf32>) {
     %8 = affine.apply #map1(%arg3, %c0)[%c2]
     %9 = tensor.extract_slice %arg0[%arg3] [2] [1] : tensor<64xf32> to tensor<2xf32>
@@ -80,9 +80,9 @@
   %v1 = arith.constant 1.0 : f32
   %v2 = arith.constant 2.0 : f32
 
-  %A = linalg.init_tensor [64] : tensor<64xf32>
-  %B = linalg.init_tensor [64] : tensor<64xf32>
-  %C = linalg.init_tensor [] : tensor<f32>
+  %A = bufferization.alloc_tensor [64] : tensor<64xf32>
+  %B = bufferization.alloc_tensor [64] : tensor<64xf32>
+  %C = bufferization.alloc_tensor [] : tensor<f32>
   %AA = linalg.fill ins(%v1 : f32) outs(%A : tensor<64xf32>) -> tensor<64xf32>
   %BB = linalg.fill ins(%v2 : f32) outs(%B : tensor<64xf32>) -> tensor<64xf32>
   %CC = linalg.fill ins(%v0 : f32) outs(%C : tensor<f32>) -> tensor<f32>
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir
@@ -1,5 +1,6 @@
 // RUN: mlir-opt %s -test-linalg-transform-patterns=test-linalg-to-vector-patterns \
-// RUN: -linalg-bufferize -arith-bufferize -tensor-bufferize -func-bufferize \
+// RUN: -linalg-init-tensor-to-alloc-tensor -linalg-bufferize -arith-bufferize \
+// RUN: -bufferization-bufferize -tensor-bufferize -func-bufferize \
 // RUN: -finalizing-bufferize -buffer-deallocation \
 // RUN: -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -8692,6 +8692,7 @@
     deps = [
         ":AllocationOpInterfaceTdFiles",
         ":CopyOpInterfaceTdFiles",
+        ":InferTypeOpInterfaceTdFiles",
         ":OpBaseTdFiles",
         ":SideEffectInterfacesTdFiles",
     ],
@@ -8755,6 +8756,7 @@
     ],
     includes = ["include"],
     deps = [
+        ":Affine",
         ":AllocationOpInterface",
         ":ArithmeticDialect",
         ":BufferizableOpInterfaceIncGen",
@@ -8763,6 +8765,7 @@
         ":CopyOpInterface",
         ":FuncDialect",
         ":IR",
+        ":InferTypeOpInterface",
         ":MemRefDialect",
         ":Support",
         ":TensorDialect",
@@ -8809,6 +8812,7 @@
         ":LoopLikeInterface",
         ":MemRefDialect",
         ":Pass",
+        ":TensorDialect",
         ":Transforms",
         "//llvm:Support",
     ],