diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
@@ -31,20 +31,54 @@
   let description = [{
     `bufferization.alloc_tensor` materializes an uninitialized tensor with a
     given shape (dynamic or static). It always bufferizes to a new buffer
-    allocation of the given shape. The optional `copy` operand specifies the
-    contents of the tensors. If no `copy` operand is specified, reading from the
-    result of an `alloc_tensor` op yields an undefined value.
+    allocation of the given shape. It is a helper op for bufferization. The
+    operation is provided as an anchor that marks the beginning of a new tensor
+    SSA use-def chain. It can be used to control in-place bufferization
+    decisions during One-Shot Bufferize: The bufferized result of a
+    `bufferization.alloc_tensor` does not alias with any other buffer, so it can
+    be used to resolve read-after-write conflicts that would have been
+    introduced by the in-place bufferization of another op.
+
+    The region of the operation specifies how the contents of the tensor should
+    be initialized. The inititalizer region has a single block argument that
+    represents the allocated tensor. It can be used as a destination for DPS
+    operations.
 
-    If `copy` is specified, no dynamic sizes should be passed, since they are
-    the same as the dynamic sizes of the `copy` operand.
+    Example:
+
+    ```mlir
+    %0 = bufferization.alloc_tensor(%sz) init {
+    ^bb0(%arg0: tensor<?xf32>):
+      bufferization.yield %t : tensor<?xf32>
+    } : tensor<?xf32>
+    ```
+
+    In the above example, the result is initialized with the contents of %t.
+    There is a shorter syntax for operations who's region has the terminator as
+    the only operation:
+
+    ```mlir
+    %0 = bufferization.alloc_tensor(%sz) copy(%t) : tensor<?xf32>
+    ```
+
+    Initialization is optional: %arg0 could be yielded instead of %t. In that
+    case the result has no specified contents. The short syntax in that is:
+
+    ```mlir
+    %0 = bufferization.alloc_tensor(%sz) : tensor<?xf32>
+    ```
+
+    Note: This operation acts as a hoisting barrier. The first example is
+    similar to writing the following IR.
+
+    ```mlir
+    %0 = bufferization.alloc_tensor(%sz) : tensor<?xf32>
+    %1 = tensor.insert_slice %t into %0[0] [%sz] [1]
+    ```
 
-    `alloc_tensor` is a helper op for bufferization. The operation is provided
-    as an anchor that marks the beginning of a new tensor SSA use-def chain. It
-    can be used to control in-place bufferization decisions during One-Shot
-    Bufferize: The bufferized result of a `bufferization.alloc_tensor` does not
-    alias with any other buffer, so it can be used to resolve read-after-write
-    conflicts that would have been introduced by the in-place bufferization of
-    another op.
+    However, the `tensor.insert_slice` is subject to canonicalizations and
+    foldings, such that there is no guarantee the allocated tensor will be
+    initialized with %t.
 
     The optional `memory_space` attribute specifies the memory space when
     bufferizing this op. The memory space is inferred from `copy` if specified.
@@ -73,17 +107,16 @@
     ```
 
     ```mlir
-    %c = bufferization.alloc_tensor(%d1, %d2) size_hint = %noe
+    %c = bufferization.alloc_tensor(%d1, %d2) size_hint(%noe)
       : tensor<?x?xf32, #SparseMatrix>
     ```
   }];
 
   let arguments = (ins Variadic<Index>:$dynamic_sizes,
-                       Optional<AnyTensor>:$copy,
                        Optional<Index>:$size_hint,
                        OptionalAttr<AnyAttr>:$memory_space);
-
   let results = (outs AnyTensor:$result);
+  let regions = (region SizedRegion<1>:$region);
 
   let extraClassDeclaration = [{
     LogicalResult bufferize(RewriterBase &rewriter,
@@ -94,19 +127,14 @@
 
     bool bufferizesToAllocation(OpResult opResult) { return true; }
 
-    bool bufferizesToMemoryRead(OpOperand &opOperand,
-                                const AnalysisState &state);
-
-    bool bufferizesToMemoryWrite(OpOperand &opOperand,
-                                 const AnalysisState &state);
-
-    AliasingOpResultList getAliasingOpResults(
-        OpOperand &opOperand, const AnalysisState &state);
-
     FailureOr<BaseMemRefType> getBufferType(
         Value value, const BufferizationOptions &options,
         const DenseMap<Value, BaseMemRefType> &fixedTypes);
 
+    bool isWritable(Value value, const AnalysisState &state) const {
+      return true;
+    }
+
     RankedTensorType getType() {
       return getResult().getType().cast<RankedTensorType>();
     }
@@ -120,7 +148,6 @@
     // the tensor at dimension `idx`. Asserts that the shape is
     // dynamic at that `idx`.
     unsigned getIndexOfDynamicSize(unsigned idx) {
-      assert(!getCopy() && "no dim sizes specified when copying a tensor");
       assert(isDynamicDim(idx) && "expected dynamic size");
       ArrayRef<int64_t> shape = getType().getShape();
       return std::count_if(
@@ -132,29 +159,35 @@
     // `idx`. Asserts that the shape is dynamic at that `idx.
     Value getDynamicSize(OpBuilder &b, unsigned idx);
 
-    // Assert that the size of the result tensor is static at `idx`
-    // and return the shape.
-    int64_t getStaticSize(unsigned idx) {
-      assert(!isDynamicDim(idx) && "expected static size");
-      return getType().getShape()[idx];
-    }
+    /// Return the terminator of the region.
+    YieldOp getTerminator();
+
+    /// Return the block argument of the region.
+    BlockArgument getBlockArgument() { return getRegion().getArgument(0); }
+
+    /// Return true if the terminator is yielding a value that is different from
+    /// the region's block argument. I.e., the allocated tensor getting
+    /// initialized.
+    bool hasInitializer();
+
+    /// Return true if this op has an initializer and the terminator is the only
+    /// op in the region.
+    bool hasCopyInitializer();
   }];
 
   let builders = [
-    // Build an op without `copy` or `memory_space` or `size_hint`.
-    OpBuilder<(ins "RankedTensorType":$type, "ValueRange":$dynamicSizes)>,
-
-    // Build an op without `memory_space` or `size_hint`.
+    // Build an op without `size_hint`.
     OpBuilder<(ins "RankedTensorType":$type, "ValueRange":$dynamicSizes,
-                   "Value":$copy)>,
+                   CArg<"Attribute", "{}">:$memory_space)>,
 
-    // Build an op without `size_hint`.
-    OpBuilder<(ins "TensorType":$type, "ValueRange":$dynamicSizes,
-                   "Value":$copy, "IntegerAttr":$memory_space)>,
+    OpBuilder<(ins "RankedTensorType":$type, "ValueRange":$dynamicSizes,
+                   "Value":$size_hint, CArg<"Attribute", "{}">:$memory_space,
+                   CArg<"Value", "{}">:$copy)>,
   ];
 
-  let hasCanonicalizer = 1;
+  let skipDefaultBuilders = 1;
   let hasCustomAssemblyFormat = 1;
+  let hasCanonicalizer = 1;
   let hasVerifier = 1;
 }
 
@@ -433,4 +466,39 @@
   let hasCanonicalizer = 1;
 }
 
+def Bufferization_YieldOp : Bufferization_Op<"yield",
+    [BufferizableOpInterface, Terminator]> {
+  let summary = "yield operation";
+  let description = [{
+    This is the terminator operation for `bufferization.alloc_tensor`. It yields
+    a single tensor value.
+  }];
+
+  let arguments = (ins AnyTensor:$tensor);
+  let assemblyFormat = "$tensor attr-dict `:` type($tensor)";
+
+  let extraClassDeclaration = [{
+    AliasingOpResultList getAliasingOpResults(
+        OpOperand &opOperand, const AnalysisState &state) const {
+      return {};
+    }
+
+    bool bufferizesToMemoryRead(OpOperand &opOperand,
+                                const AnalysisState &state) const {
+      return true;
+    }
+
+    bool bufferizesToMemoryWrite(OpOperand &opOperand,
+                                 const AnalysisState &state) const {
+      return false;
+    }
+
+    LogicalResult bufferize(RewriterBase &rewriter,
+                            const BufferizationOptions &options) {
+      // Bufferized as part of bufferization.alloc_tensor.
+      return success();
+    }
+  }];
+}
+
 #endif // BUFFERIZATION_OPS
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@@ -132,34 +132,34 @@
   }
   RankedTensorType tensorType = tensor.getType().cast<RankedTensorType>();
   SmallVector<Value> dynamicSizes;
-  if (!copy) {
-    // Compute the dynamic part of the shape.
-    // First try to query the shape via ReifyRankedShapedTypeOpInterface.
-    bool reifiedShapes = false;
-    if (shapedValue.getType().isa<RankedTensorType>() &&
-        shapedValue.isa<OpResult>()) {
-      if (auto rankedOp = dyn_cast_or_null<ReifyRankedShapedTypeOpInterface>(
-              shapedValue.getDefiningOp())) {
-        ReifiedRankedShapedTypeDims resultDims;
-        if (succeeded(rankedOp.reifyResultShapes(b, resultDims))) {
-          reifiedShapes = true;
-          auto &shape =
-              resultDims[shapedValue.cast<OpResult>().getResultNumber()];
-          for (const auto &dim : enumerate(tensorType.getShape()))
-            if (ShapedType::isDynamic(dim.value()))
-              dynamicSizes.push_back(shape[dim.index()]);
-        }
+  // Compute the dynamic part of the shape.
+  // First try to query the shape via ReifyRankedShapedTypeOpInterface.
+  bool reifiedShapes = false;
+  if (shapedValue.getType().isa<RankedTensorType>() &&
+      shapedValue.isa<OpResult>()) {
+    if (auto rankedOp = dyn_cast_or_null<ReifyRankedShapedTypeOpInterface>(
+            shapedValue.getDefiningOp())) {
+      ReifiedRankedShapedTypeDims resultDims;
+      if (succeeded(rankedOp.reifyResultShapes(b, resultDims))) {
+        reifiedShapes = true;
+        auto &shape =
+            resultDims[shapedValue.cast<OpResult>().getResultNumber()];
+        for (const auto &dim : enumerate(tensorType.getShape()))
+          if (ShapedType::isDynamic(dim.value()))
+            dynamicSizes.push_back(shape[dim.index()]);
       }
     }
-
-    // If the shape could not be reified, create DimOps.
-    if (!reifiedShapes)
-      populateDynamicDimSizes(b, loc, tensor, dynamicSizes);
   }
 
+  // If the shape could not be reified, create DimOps.
+  if (!reifiedShapes)
+    populateDynamicDimSizes(b, loc, tensor, dynamicSizes);
+
   // Create AllocTensorOp.
-  auto allocTensorOp = b.create<AllocTensorOp>(loc, tensorType, dynamicSizes,
-                                               copy ? tensor : Value());
+  auto allocTensorOp =
+      b.create<AllocTensorOp>(loc, tensorType, dynamicSizes,
+                              /*sizeHint=*/Value(), /*memorySpace=*/Attribute(),
+                              /*copy=*/copy ? tensor : Value());
   allocTensorOp->setAttr(BufferizationDialect::kEscapeAttrName,
                          b.getBoolArrayAttr({escape}));
 
@@ -261,7 +261,9 @@
         opResult.getUses(), [](OpOperand &use) { return &use; }));
     for (OpOperand *use : uses) {
       // Do not update the alloc_tensor op that we just created.
-      if (use->getOwner() == copy->getDefiningOp())
+      if (copy->getDefiningOp<AllocTensorOp>()
+              .getRegion()
+              .findAncestorOpInRegion(*use->getOwner()))
         continue;
       // tensor.dim ops may have been created to be used as alloc_tensor op
       // dynamic extents. Do not update these either.
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
@@ -153,6 +153,7 @@
                                        const BufferizationOptions &options) {
   OpBuilder::InsertionGuard g(rewriter);
   Location loc = getLoc();
+  YieldOp yieldOp = getTerminator();
 
   // Nothing to do for dead AllocTensorOps.
   if (getOperation()->getUses().empty()) {
@@ -160,33 +161,40 @@
     return success();
   }
 
-  // Get "copy" buffer.
-  Value copyBuffer;
-  if (getCopy()) {
-    FailureOr<Value> maybeCopyBuffer = getBuffer(rewriter, getCopy(), options);
-    if (failed(maybeCopyBuffer))
-      return failure();
-    copyBuffer = *maybeCopyBuffer;
-  }
-
   // Create memory allocation.
   auto allocType = bufferization::getBufferType(getResult(), options);
   if (failed(allocType))
     return failure();
   SmallVector<Value> dynamicDims = getDynamicSizes();
-  if (getCopy()) {
-    assert(dynamicDims.empty() && "expected either `copy` or `dynamicDims`");
-    populateDynamicDimSizes(rewriter, loc, copyBuffer, dynamicDims);
-  }
   FailureOr<Value> alloc = options.createAlloc(
       rewriter, loc, allocType->cast<MemRefType>(), dynamicDims);
   if (failed(alloc))
     return failure();
 
-  // Create memory copy (if any).
-  if (getCopy()) {
-    if (failed(options.createMemCpy(rewriter, loc, copyBuffer, *alloc)))
+  if (hasInitializer()) {
+    // Bufferize terminator to memcpy.
+    Operation *yieldedDef = yieldOp.getTensor().getDefiningOp();
+    if (yieldedDef && getRegion().findAncestorOpInRegion(*yieldedDef)) {
+      rewriter.setInsertionPointAfter(yieldedDef);
+    } else {
+      rewriter.setInsertionPoint(yieldOp);
+    }
+    auto cpySrcBuffer = getBuffer(rewriter, yieldOp.getTensor(), options);
+    if (failed(cpySrcBuffer))
+      return failure();
+    if (failed(options.createMemCpy(rewriter, yieldOp->getLoc(), *cpySrcBuffer,
+                                    *alloc)))
       return failure();
+    rewriter.eraseOp(yieldOp);
+
+    // Move region after the alloc.
+    rewriter.setInsertionPointAfter(getOperation());
+    if (!getBlockArgument().getUses().empty()) {
+      Value bbargReplacement = rewriter.create<ToTensorOp>(loc, *alloc);
+      rewriter.replaceAllUsesWith(getBlockArgument(), bbargReplacement);
+    }
+    getOperation()->getBlock()->getOperations().splice(
+        getOperation()->getIterator(), getRegion().front().getOperations());
   }
 
   // Should the buffer be deallocated?
@@ -208,46 +216,25 @@
 
 bool AllocTensorOp::resultBufferizesToMemoryWrite(OpResult opResult,
                                                   const AnalysisState &state) {
-  // AllocTensorOps do not write unless they have a `copy` value.
-  return static_cast<bool>(getCopy());
-}
-
-bool AllocTensorOp::bufferizesToMemoryRead(OpOperand &opOperand,
-                                           const AnalysisState &state) {
-  assert(opOperand.getOperandNumber() == getNumOperands() - 1 &&
-         "expected copy operand");
-  return true;
-}
-
-bool AllocTensorOp::bufferizesToMemoryWrite(OpOperand &opOperand,
-                                            const AnalysisState &state) {
-  assert(opOperand.getOperandNumber() == getNumOperands() - 1 &&
-         "expected copy operand");
-  return false;
-}
-
-AliasingOpResultList
-AllocTensorOp::getAliasingOpResults(OpOperand &opOperand,
-                                    const AnalysisState &state) {
-  // This is a new allocation. It does not alias with any other buffer.
-  return {};
+  // AllocTensorOps do not write unless they have an initializer.
+  return hasInitializer();
 }
 
 FailureOr<BaseMemRefType> AllocTensorOp::getBufferType(
     Value value, const BufferizationOptions &options,
     const DenseMap<Value, BaseMemRefType> &fixedTypes) {
-  assert(value == getResult() && "invalid value");
+  assert(getOwnerOfValue(value) == getOperation() && "invalid value");
 
   // Compute memory space of this allocation.
   Attribute memorySpace;
   if (getMemorySpace().has_value()) {
     memorySpace = *getMemorySpace();
-  } else if (getCopy()) {
-    auto copyBufferType =
-        bufferization::getBufferType(getCopy(), options, fixedTypes);
-    if (failed(copyBufferType))
+  } else if (hasCopyInitializer() && !value.isa<BlockArgument>()) {
+    FailureOr<BaseMemRefType> yieldedType = bufferization::getBufferType(
+        getTerminator().getTensor(), options, fixedTypes);
+    if (failed(yieldedType))
       return failure();
-    memorySpace = copyBufferType->getMemorySpace();
+    memorySpace = yieldedType->getMemorySpace();
   } else if (options.defaultMemorySpace.has_value()) {
     memorySpace = *options.defaultMemorySpace;
   } else {
@@ -258,14 +245,10 @@
 }
 
 LogicalResult AllocTensorOp::verify() {
-  if (getCopy() && !getDynamicSizes().empty())
-    return emitError("dynamic sizes not needed when copying a tensor");
-  if (!getCopy() && getType().getNumDynamicDims() !=
-                        static_cast<int64_t>(getDynamicSizes().size()))
+  if (getType().getNumDynamicDims() !=
+      static_cast<int64_t>(getDynamicSizes().size()))
     return emitError("expected ")
            << getType().getNumDynamicDims() << " dynamic sizes";
-  if (getCopy() && getCopy().getType() != getType())
-    return emitError("expected that `copy` and return type match");
 
   // For sparse tensor allocation, we require that none of its
   // uses escapes the function boundary directly.
@@ -276,28 +259,44 @@
         return emitError("sparse tensor allocation should not escape function");
   }
 
-  return success();
-}
+  if (getRegion().getNumArguments() != 1)
+    return emitError("expected 1 block argument, found ")
+           << getRegion().getNumArguments();
 
-void AllocTensorOp::build(OpBuilder &builder, OperationState &result,
-                          RankedTensorType type, ValueRange dynamicSizes) {
-  build(builder, result, type, dynamicSizes, /*copy=*/Value(),
-        /*size_hint=*/Value(),
-        /*memory_space=*/IntegerAttr());
+  if (getRegion().getArgument(0).getType() != getType())
+    return emitError("expected ") << getType() << " block argument";
+
+  return success();
 }
 
 void AllocTensorOp::build(OpBuilder &builder, OperationState &result,
                           RankedTensorType type, ValueRange dynamicSizes,
-                          Value copy) {
-  build(builder, result, type, dynamicSizes, copy, /*size_hint=*/Value(),
-        /*memory_space=*/IntegerAttr());
+                          Attribute memorySpace) {
+  build(builder, result, type, dynamicSizes, /*size_hint=*/Value(), memorySpace,
+        /*copy=*/Value());
 }
 
 void AllocTensorOp::build(OpBuilder &builder, OperationState &result,
-                          TensorType type, ValueRange dynamicSizes, Value copy,
-                          IntegerAttr memorySpace) {
-  build(builder, result, type, dynamicSizes, copy, /*size_hint=*/Value(),
-        memorySpace);
+                          RankedTensorType type, ValueRange dynamicSizes,
+                          Value sizeHint, Attribute memorySpace, Value copy) {
+  OpBuilder::InsertionGuard g(builder);
+  assert(type.getNumDynamicDims() ==
+             static_cast<int64_t>(dynamicSizes.size()) &&
+         "invalid number of dynamic dims");
+  result.addOperands(dynamicSizes);
+  if (sizeHint)
+    result.addOperands(sizeHint);
+  result.addAttribute(
+      getOperandSegmentSizesAttrName(result.name),
+      builder.getDenseI32ArrayAttr(
+          {static_cast<int32_t>(dynamicSizes.size()), (sizeHint ? 1 : 0)}));
+  if (memorySpace) {
+    result.addAttribute(getMemorySpaceAttrName(result.name), memorySpace);
+  }
+  result.addTypes(type);
+  Region *r = result.addRegion();
+  Block *block = builder.createBlock(r, r->begin(), type, result.location);
+  builder.create<YieldOp>(result.location, copy ? copy : block->getArgument(0));
 }
 
 namespace {
@@ -317,7 +316,7 @@
 
   LogicalResult matchAndRewrite(AllocTensorOp op,
                                 PatternRewriter &rewriter) const override {
-    if (op.getCopy())
+    if (op.hasInitializer())
       return failure();
     SmallVector<int64_t> newShape = llvm::to_vector(op.getType().getShape());
     SmallVector<Value> newDynamicSizes;
@@ -338,7 +337,9 @@
     if (newType == op.getType())
       return failure();
     auto newOp = rewriter.create<AllocTensorOp>(
-        op.getLoc(), newType, newDynamicSizes, /*copy=*/Value());
+        op.getLoc(), newType, newDynamicSizes, /*sizeHint=*/op.getSizeHint(),
+        /*memorySpace=*/op.getMemorySpace().has_value() ? *op.getMemorySpace()
+                                                        : Attribute());
     rewriter.replaceOpWithNewOp<tensor::CastOp>(op, op.getType(), newOp);
     return success();
   }
@@ -373,63 +374,100 @@
       llvm::seq<int64_t>(0, getType().getRank()), [&](int64_t dim) -> Value {
         if (isDynamicDim(dim))
           return getDynamicSize(builder, dim);
-        return builder.create<arith::ConstantIndexOp>(getLoc(),
-                                                      getStaticSize(dim));
+        return builder.create<arith::ConstantIndexOp>(
+            getLoc(), getType().getDimSize(dim));
       }));
   reifiedReturnShapes.emplace_back(std::move(shapes));
   return success();
 }
 
 ParseResult AllocTensorOp::parse(OpAsmParser &parser, OperationState &result) {
-  SmallVector<OpAsmParser::UnresolvedOperand> dynamicSizesOperands;
-  if (parser.parseLParen() || parser.parseOperandList(dynamicSizesOperands) ||
-      parser.parseRParen())
+  Type indexType = parser.getBuilder().getIndexType();
+
+  // Parse dynamic sizes.
+  if (parser.parseLParen())
     return failure();
+  SmallVector<OpAsmParser::UnresolvedOperand, 4> dynamicSizesOperands;
+  SMLoc dynamicSizesLoc = parser.getCurrentLocation();
+  if (parser.parseOperandList(dynamicSizesOperands) || parser.parseRParen() ||
+      parser.resolveOperands(dynamicSizesOperands, indexType, dynamicSizesLoc,
+                             result.operands))
+    return failure();
+
+  // Parse optional copy operand.
   ParseResult copyKeyword = parser.parseOptionalKeyword("copy");
   OpAsmParser::UnresolvedOperand copyOperand;
   if (copyKeyword.succeeded())
     if (parser.parseLParen() || parser.parseOperand(copyOperand) ||
         parser.parseRParen())
       return failure();
+
+  // Parse optional size_hint operand.
   ParseResult sizeHintKeyword = parser.parseOptionalKeyword("size_hint");
   OpAsmParser::UnresolvedOperand sizeHintOperand;
   if (sizeHintKeyword.succeeded())
-    if (parser.parseEqual() || parser.parseOperand(sizeHintOperand))
+    if (parser.parseLParen() || parser.parseOperand(sizeHintOperand) ||
+        parser.parseRParen() ||
+        parser.resolveOperand(sizeHintOperand, indexType, result.operands))
       return failure();
-  if (parser.parseOptionalAttrDict(result.attributes) || parser.parseColon())
+
+  // Parse attributes.
+  if (parser.parseOptionalAttrDict(result.attributes))
     return failure();
 
+  // Parse region.
+  Region *region = result.addRegion();
+  ParseResult initKeyword;
+  if (copyKeyword.failed()) {
+    initKeyword = parser.parseOptionalKeyword("init");
+    if (initKeyword.succeeded())
+      if (parser.parseRegion(*region))
+        return failure();
+  }
+
+  // Parse result type.
   TensorType type;
-  if (parser.parseCustomTypeWithFallback(type))
+  if (parser.parseColon() || parser.parseCustomTypeWithFallback(type))
     return failure();
   result.addTypes(type);
 
-  Type indexType = parser.getBuilder().getIndexType();
-  if (parser.resolveOperands(dynamicSizesOperands, indexType, result.operands))
-    return failure();
-  if (copyKeyword.succeeded())
-    if (parser.resolveOperand(copyOperand, type, result.operands))
-      return failure();
-  if (sizeHintKeyword.succeeded())
-    if (parser.resolveOperand(sizeHintOperand, indexType, result.operands))
-      return failure();
-  result.addAttribute(AllocTensorOp::getOperandSegmentSizeAttr(),
+  // Create region if none was parsed.
+  if (copyKeyword.succeeded() || initKeyword.failed()) {
+    OpBuilder b(parser.getContext());
+    Block *block =
+        b.createBlock(region, region->begin(), type, result.location);
+    SmallVector<Value> copyValue;
+    if (copyKeyword.succeeded()) {
+      if (parser.resolveOperand(copyOperand, type, copyValue))
+        return failure();
+    } else {
+      copyValue.push_back(block->getArgument(0));
+    }
+    b.create<YieldOp>(result.location, copyValue[0]);
+  }
+
+  // Add operand_segment_sizes.
+  result.addAttribute("operand_segment_sizes",
                       parser.getBuilder().getDenseI32ArrayAttr(
                           {static_cast<int32_t>(dynamicSizesOperands.size()),
-                           static_cast<int32_t>(copyKeyword.succeeded()),
                            static_cast<int32_t>(sizeHintKeyword.succeeded())}));
   return success();
 }
 
 void AllocTensorOp::print(OpAsmPrinter &p) {
   p << "(" << getDynamicSizes() << ")";
-  if (getCopy())
-    p << " copy(" << getCopy() << ")";
+  if (hasCopyInitializer())
+    p << " copy(" << getTerminator().getTensor() << ")";
   if (getSizeHint())
-    p << " size_hint=" << getSizeHint();
+    p << " size_hint(" << getSizeHint() << ")";
+  if (hasInitializer() && !hasCopyInitializer()) {
+    p << " init ";
+    p.printRegion(getRegion());
+  }
   p.printOptionalAttrDict((*this)->getAttrs(), /*elidedAttrs=*/{
                               AllocTensorOp::getOperandSegmentSizeAttr()});
-  p << " : ";
+  p << ' ' << ":";
+  p << ' ';
   auto type = getResult().getType();
   if (auto validType = type.dyn_cast<::mlir::TensorType>())
     p.printStrippedAttrOrType(validType);
@@ -439,11 +477,22 @@
 
 Value AllocTensorOp::getDynamicSize(OpBuilder &b, unsigned idx) {
   assert(isDynamicDim(idx) && "expected dynamic dim");
-  if (getCopy())
-    return b.create<tensor::DimOp>(getLoc(), getCopy(), idx);
   return getOperand(getIndexOfDynamicSize(idx));
 }
 
+YieldOp AllocTensorOp::getTerminator() {
+  return cast<YieldOp>(getRegion().begin()->getTerminator());
+}
+
+bool AllocTensorOp::hasInitializer() {
+  return getTerminator().getTensor() != getBlockArgument();
+}
+
+bool AllocTensorOp::hasCopyInitializer() {
+  return hasInitializer() &&
+         &getRegion().front().front() == getTerminator().getOperation();
+}
+
 //===----------------------------------------------------------------------===//
 // CloneOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
--- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
@@ -357,9 +357,12 @@
 protected:
   void notifyOperationRemoved(Operation *op) override {
     IRRewriter::notifyOperationRemoved(op);
-    erasedOps.insert(op);
-    // Erase if present.
-    toMemrefOps.erase(op);
+    // TODO: Remove this walk once we get notifications for nested removals.
+    op->walk([&](Operation *op) {
+      erasedOps.insert(op);
+      // Erase if present.
+      toMemrefOps.erase(op);
+    });
   }
 
   void notifyOperationInserted(Operation *op) override {
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt
--- a/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt
@@ -21,6 +21,7 @@
   MLIRBufferizationEnumsIncGen
 
   LINK_LIBS PUBLIC
+  MLIRArithDialect
   MLIRBufferizationDialect
   MLIRControlFlowInterfaces
   MLIRFuncDialect
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp b/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp
--- a/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp
@@ -8,6 +8,7 @@
 
 #include "mlir/Dialect/Bufferization/Transforms/Passes.h"
 
+#include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
@@ -15,6 +16,7 @@
 #include "mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h"
 #include "mlir/Dialect/Bufferization/Transforms/Transforms.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 
 namespace mlir {
 namespace bufferization {
@@ -84,10 +86,16 @@
 
       // Insert a tensor copy and replace all uses inside of repetitive regions.
       rewriter.setInsertionPoint(bufferizableOp);
+      Location loc = bufferizableOp->getLoc();
+      auto tensorType = operand.getType().cast<RankedTensorType>();
+      SmallVector<Value> dynamicDims;
+      for (int64_t i = 0; i < tensorType.getRank(); ++i)
+        if (tensorType.isDynamicDim(i))
+          dynamicDims.push_back(rewriter.create<tensor::DimOp>(
+              loc, operand, rewriter.create<arith::ConstantIndexOp>(loc, i)));
       auto tensorCopy = rewriter.create<AllocTensorOp>(
-          bufferizableOp->getLoc(), operand.getType().cast<TensorType>(),
-          /*dynamicSizes=*/ValueRange(),
-          /*copy=*/operand, /*memory_space=*/IntegerAttr());
+          loc, tensorType, dynamicDims, /*sizeHint=*/Value(),
+          /*memory_space=*/Attribute(), /*copy=*/operand);
       for (OpOperand *use : usesInsideRegion)
         use->set(tensorCopy);
     }
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
@@ -736,7 +736,7 @@
     const auto resType = getSparseTensorType(op);
     if (!resType.hasEncoding())
       return failure();
-    if (op.getCopy())
+    if (op.hasInitializer())
       return rewriter.notifyMatchFailure(op, "tensor copy not implemented");
 
     // Construct allocation for each field.
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -833,7 +833,7 @@
   LogicalResult
   matchAndRewrite(bufferization::AllocTensorOp op, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
-    if (op.getCopy())
+    if (op.hasInitializer())
       return rewriter.notifyMatchFailure(op,
                                          "sparse tensor copy not implemented");
     Location loc = op.getLoc();
@@ -849,7 +849,7 @@
       dimSizes.push_back(
           stt.isDynamicDim(d)
               ? adaptor.getOperands()[operandCtr++]
-              : constantIndex(rewriter, loc, op.getStaticSize(d)));
+              : constantIndex(rewriter, loc, op.getType().getDimSize(d)));
     }
     // Generate the call to construct empty tensor. The sizes are
     // explicitly defined by the arguments to the alloc operator.
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
@@ -53,10 +53,9 @@
   Value val = op->get();
   // Check allocation, with zero alloc when required.
   if (auto alloc = val.getDefiningOp<AllocTensorOp>()) {
-    Value copy = alloc.getCopy();
-    if (isZero)
-      return copy && isZeroValue(copy);
-    return !copy;
+    return isZero ? alloc.hasInitializer() &&
+                        isZeroValue(alloc.getTerminator().getTensor())
+                  : !alloc.hasInitializer();
   }
   // Last resort for zero alloc: the whole value is zero.
   return isZero && isZeroValue(val);
@@ -232,7 +231,9 @@
     Value zero = constantZero(rewriter, op.getLoc(), op.getResult(0).getType());
     AllocTensorOp a =
         op.getDpsInitOperand(0)->get().getDefiningOp<AllocTensorOp>();
-    rewriter.updateRootInPlace(a, [&]() { a.getCopyMutable().assign(zero); });
+    rewriter.updateRootInPlace(a.getTerminator(), [&]() {
+      a.getTerminator().getTensorMutable().assign(zero);
+    });
     rewriter.replaceOp(op, op.getDpsInitOperand(0)->get());
     return success();
   }
@@ -328,10 +329,13 @@
       Value init = prod.getDpsInitOperand(0)
                        ->get()
                        .getDefiningOp<AllocTensorOp>()
-                       .getCopy();
+                       .getTerminator()
+                       .getTensor();
       AllocTensorOp a =
           op.getDpsInitOperand(0)->get().getDefiningOp<AllocTensorOp>();
-      rewriter.updateRootInPlace(a, [&]() { a.getCopyMutable().assign(init); });
+      rewriter.updateRootInPlace(a.getTerminator(), [&]() {
+        a.getTerminator().getTensorMutable().assign(init);
+      });
     }
     // Replace consumer with fused operation. Old producer
     // and consumer ops will be removed by DCE.
@@ -390,11 +394,11 @@
     //   %t = sparse_tensor.cast %tmp
     Value nnz = rewriter.create<NumberOfEntriesOp>(loc, srcTensor);
     RankedTensorType cooTp = getUnorderedCOOFromType(dstTp);
-    Value cooBuffer =
-        rewriter
-            .create<AllocTensorOp>(loc, cooTp, dstDynSizes, Value(),
-                                   /*sizeHint=*/nnz, Attribute())
-            .getResult();
+    Value cooBuffer = rewriter
+                          .create<AllocTensorOp>(loc, cooTp, dstDynSizes,
+                                                 /*sizeHint=*/nnz,
+                                                 /*memorySpace=*/Attribute())
+                          .getResult();
 
     ForeachOp foreachOp = rewriter.create<ForeachOp>(
         loc, srcTensor, cooBuffer,
@@ -792,8 +796,9 @@
       // Ensure that mutating `srcRTT` didn't invalidate `dimRank`.
       assert(static_cast<Dimension>(srcRTT.getRank()) == dimRank);
       tmpCoo = rewriter
-                   .create<AllocTensorOp>(loc, srcRTT, dynSrcSizes, Value(),
-                                          /*sizeHint=*/nnz, Attribute())
+                   .create<AllocTensorOp>(loc, srcRTT, dynSrcSizes,
+                                          /*sizeHint=*/nnz,
+                                          /*memorySpace=*/Attribute())
                    .getResult();
       auto foreachOp = rewriter.create<ForeachOp>(
           loc, src, tmpCoo,
@@ -853,8 +858,9 @@
     getDynamicSizes(dstTp, srcSizes, dynDstSizes);
     Value dst = rewriter
                     .create<AllocTensorOp>(loc, dstTp.getRankedTensorType(),
-                                           dynDstSizes, Value(),
-                                           /*sizeHint=*/nnz, Attribute())
+                                           dynDstSizes,
+                                           /*sizeHint=*/nnz,
+                                           /*memorySpace=*/Attribute())
                     .getResult();
     SmallVector<Value> indices(dstLvlRank);
     auto foreachOp = rewriter.create<ForeachOp>(
@@ -1044,11 +1050,11 @@
                     .getResult(0);
     RankedTensorType cooTp =
         getUnorderedCOOFromTypeWithOrdering(dstTp, dstTp.getDimToLvlMap());
-    Value cooBuffer =
-        rewriter
-            .create<AllocTensorOp>(loc, cooTp, dynSizesArray, Value(),
-                                   /*sizeHint=*/nnz, Attribute())
-            .getResult();
+    Value cooBuffer = rewriter
+                          .create<AllocTensorOp>(loc, cooTp, dynSizesArray,
+                                                 /*sizeHint=*/nnz,
+                                                 /*memorySpace=*/Attribute())
+                          .getResult();
 
     Type eltTp = dstTp.getElementType();
     Value value = genAllocaScalar(rewriter, loc, eltTp);
diff --git a/mlir/python/mlir/dialects/_bufferization_ops_ext.py b/mlir/python/mlir/dialects/_bufferization_ops_ext.py
--- a/mlir/python/mlir/dialects/_bufferization_ops_ext.py
+++ b/mlir/python/mlir/dialects/_bufferization_ops_ext.py
@@ -4,10 +4,11 @@
 
 try:
   from typing import Sequence, Union
+  from ..dialects import bufferization
   from ..ir import *
   from ._ods_common import get_default_loc_context
 
-  from typing import Any, List, Union
+  from typing import Any, List, Optional, Union
 except ImportError as e:
   raise RuntimeError("Error loading imports from extension module") from e
 
@@ -15,24 +16,41 @@
 class AllocTensorOp:
   """Extends the bufferization.alloc_tensor op."""
 
-  def __init__(self,
-               tensor_type: Type,
-               dynamic_sizes: Sequence[Value],
-               copy: Value,
-               size_hint: Value,
-               escape: BoolAttr,
-               *,
-               loc=None,
-               ip=None):
+  def __init__(
+      self,
+      tensor_type: Type,
+      dynamic_sizes: Sequence[Value],
+      copy: Optional[Value],
+      size_hint: Value,
+      escape: BoolAttr,
+      *,
+      loc=None,
+      ip=None
+  ):
     """Constructs an `alloc_tensor` with static and/or dynamic sizes."""
     context = get_default_loc_context(loc)
     attributes = {}
     if escape:
       attributes["escape"] = escape
     op = self.build_generic(
+        regions=1,
         results=[tensor_type],
-        operands=[dynamic_sizes, copy, size_hint],
+        operands=[dynamic_sizes, size_hint],
         attributes=attributes,
         loc=loc,
-        ip=ip)
+        ip=ip,
+    )
     OpView.__init__(self, op)
+
+    # Create region with terminator.
+    self.regions[0].blocks.append(tensor_type)
+    with InsertionPoint(self.regions[0].blocks[0]):
+      if copy:
+        bufferization.YieldOp(copy)
+      else:
+        bufferization.YieldOp(self.regions[0].blocks[0].arguments[0])
+
+  @property
+  def block(self):
+    """Returns the then block of the operation."""
+    return self.regions[0].blocks[0]
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir
@@ -1,12 +1,12 @@
-// RUN: mlir-opt %s -one-shot-bufferize="allow-unknown-ops" -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-unknown-ops" -split-input-file | FileCheck %s
 
 // Run fuzzer with different seeds.
-// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
 
 // Run with top-down analysis.
-// RUN: mlir-opt %s -one-shot-bufferize="allow-unknown-ops analysis-heuristic=top-down" -split-input-file | FileCheck %s --check-prefix=CHECK-TOP-DOWN-ANALYSIS
+// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-unknown-ops analysis-heuristic=top-down" -split-input-file | FileCheck %s --check-prefix=CHECK-TOP-DOWN-ANALYSIS
 
 // Test without analysis: Insert a copy on every buffer write.
 // RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-unknown-ops copy-before-write" -split-input-file | FileCheck %s --check-prefix=CHECK-COPY-BEFORE-WRITE
@@ -199,3 +199,60 @@
   %3 = tensor.extract %0[%pos3] : tensor<100xf32>
   return %2, %3 : f32, f32
 }
+
+// -----
+
+// CHECK-LABEL: @alloc_tensor_with_init(
+func.func @alloc_tensor_with_init(%sz: index, %f: f32) {
+  // CHECK: %[[alloc:.*]] = memref.alloc{{.*}} : memref<?xf32, 4>
+  // CHECK: linalg.fill {{.*}} outs(%[[alloc]] : memref<?xf32, 4>)
+  %0 = bufferization.alloc_tensor(%sz) { memory_space = 4 } init {
+  ^bb0(%arg0: tensor<?xf32>):
+    %1 = linalg.fill ins(%f : f32) outs(%arg0 : tensor<?xf32>) -> tensor<?xf32>
+    bufferization.yield %1 : tensor<?xf32>
+  } : tensor<?xf32>
+  // CHECK: %[[t:.*]] = bufferization.to_tensor %[[alloc]]
+  // CHECK: "dummy.some_use"(%[[t]])
+  "dummy.some_use"(%0) : (tensor<?xf32>) -> ()
+  return
+}
+
+// -----
+
+// CHECK-LABEL: @alloc_tensor_with_init_conflict(
+func.func @alloc_tensor_with_init_conflict(
+    %t: tensor<10xf32>, %sz: index, %f: f32) {
+  // CHECK: %[[alloc:.*]] = memref.alloc{{.*}} : memref<10xf32, 4>
+
+  // A second alloc is needed because the linalg.fill bufferizes out-of-place.
+  // CHECK: %[[alloc2:.*]] = memref.alloc{{.*}} : memref<10xf32>
+  // CHECK: linalg.fill {{.*}} outs(%[[alloc2]] : memref<10xf32>)
+
+  // CHECK: memref.copy %[[alloc2]], %[[alloc]]
+  // CHECK: memref.dealloc %[[alloc2]]
+  %0 = bufferization.alloc_tensor() { memory_space = 4 } init {
+  ^bb0(%arg0: tensor<10xf32>):
+    %1 = linalg.fill ins(%f : f32) outs(%t : tensor<10xf32>) -> tensor<10xf32>
+    bufferization.yield %1 : tensor<10xf32>
+  } : tensor<10xf32>
+
+  // CHECK: %[[t:.*]] = bufferization.to_tensor %[[alloc]]
+  // CHECK: "dummy.some_use"(%[[t]])
+  "dummy.some_use"(%0) : (tensor<10xf32>) -> ()
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func @alloc_tensor_0d(
+//  CHECK-SAME:     %[[t:.*]]: tensor<f32>
+func.func @alloc_tensor_0d(%t: tensor<f32>) -> f32 {
+  // CHECK: %[[m:.*]] = bufferization.to_memref %[[t]]
+  // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<f32>
+  // CHECK: memref.copy %[[m]], %[[alloc]]
+  %0 = bufferization.alloc_tensor() copy(%t) : tensor<f32>
+  // CHECK: %[[r:.*]] = memref.load %[[alloc]][]
+  %1 = tensor.extract %0[] : tensor<f32>
+  // CHECK: return %[[r]]
+  return %1 : f32
+}
diff --git a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
@@ -9,9 +9,10 @@
 func.func @read_after_write_conflict(%t: tensor<?xf32>, %idx: index, %f: f32)
   -> (tensor<?xf32>, tensor<?xf32>)
 {
-  // CHECK: %[[copy:.*]] = bufferization.alloc_tensor() copy(%[[t]]) {bufferization.escape = [false]} : tensor<?xf32>
-  // CHECK-FUNC: bufferization.alloc_tensor() copy(%{{.*}}) {bufferization.escape = [true]} : tensor<?xf32>
-  // CHECK-NO-DEALLOC: bufferization.alloc_tensor() copy(%{{.*}}) {bufferization.escape = [true]} : tensor<?xf32>
+  // CHECK: %[[dim:.*]] = tensor.dim %[[t]]
+  // CHECK: %[[copy:.*]] = bufferization.alloc_tensor(%[[dim]]) copy(%[[t]]) {bufferization.escape = [false]} : tensor<?xf32>
+  // CHECK-FUNC: bufferization.alloc_tensor({{.*}}) copy(%{{.*}}) {bufferization.escape = [true]} : tensor<?xf32>
+  // CHECK-NO-DEALLOC: bufferization.alloc_tensor({{.*}}) copy(%{{.*}}) {bufferization.escape = [true]} : tensor<?xf32>
   // CHECK: %[[insert:.*]] = tensor.insert %{{.*}} into %[[copy]]
   %0 = tensor.insert %f into %t[%idx] : tensor<?xf32>
   // CHECK: return %[[insert]], %[[t]]
diff --git a/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir b/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt --test-transform-dialect-interpreter %s -split-input-file -verify-diagnostics | FileCheck %s
+// RUN: mlir-opt --test-transform-dialect-interpreter %s -cse -split-input-file -verify-diagnostics | FileCheck %s
 
 // Test One-Shot Bufferize.
 
diff --git a/mlir/test/Dialect/Bufferization/invalid.mlir b/mlir/test/Dialect/Bufferization/invalid.mlir
--- a/mlir/test/Dialect/Bufferization/invalid.mlir
+++ b/mlir/test/Dialect/Bufferization/invalid.mlir
@@ -19,8 +19,8 @@
 // -----
 
 func.func @alloc_tensor_copy_and_dims(%t: tensor<?xf32>, %sz: index) {
-  // expected-error @+1{{dynamic sizes not needed when copying a tensor}}
-  %0 = bufferization.alloc_tensor(%sz) copy(%t) : tensor<?xf32>
+  // expected-error @+1{{expected 1 dynamic sizes}}
+  %0 = bufferization.alloc_tensor() copy(%t) : tensor<?xf32>
   return
 }
 
@@ -81,6 +81,28 @@
 
 // -----
 
+func.func @alloc_tensor_type_mismatch(%sz: index) {
+  // expected-error @+1{{expected 'tensor<?xf32>' block argument}}
+  %0 = bufferization.alloc_tensor(%sz) init {
+  ^bb0(%arg0: tensor<5xf32>):
+    bufferization.yield %arg0 : tensor<5xf32>
+  } : tensor<?xf32>
+  return
+}
+
+// -----
+
+func.func @alloc_tensor_invalid_num_bbargs(%sz: index) {
+  // expected-error @+1{{expected 1 block argument, found 2}}
+  %0 = bufferization.alloc_tensor(%sz) init {
+  ^bb0(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>):
+    bufferization.yield %arg0 : tensor<?xf32>
+  } : tensor<?xf32>
+  return
+}
+
+// -----
+
 // expected-error @+1{{invalid value for 'bufferization.access'}}
 func.func private @invalid_buffer_access_type(tensor<*xf32> {bufferization.access = "foo"})
 
diff --git a/mlir/test/Dialect/Bufferization/ops.mlir b/mlir/test/Dialect/Bufferization/ops.mlir
--- a/mlir/test/Dialect/Bufferization/ops.mlir
+++ b/mlir/test/Dialect/Bufferization/ops.mlir
@@ -27,28 +27,58 @@
   return %tensor : tensor<2xf32>
 }
 
-// CHECK-LABEL: func @test_alloc_tensor_op
-func.func @test_alloc_tensor_op(%t: tensor<?x5xf32>, %sz: index)
-  -> tensor<?x5xf32>
-{
+// CHECK-LABEL: func @test_alloc_tensor_op(
+//  CHECK-SAME:     %[[t:.*]]: tensor<?x5xf32>, %[[sz:.*]]: index
+func.func @test_alloc_tensor_op(%t: tensor<?x5xf32>, %sz: index, %f: f32) {
   // CHECK: bufferization.alloc_tensor(%{{.*}}) : tensor<?x5xf32>
   %0 = bufferization.alloc_tensor(%sz) : tensor<?x5xf32>
-  // CHECK: bufferization.alloc_tensor() copy(%{{.*}}) : tensor<?x5xf32>
-  %1 = bufferization.alloc_tensor() copy(%t) : tensor<?x5xf32>
+
+  // CHECK: bufferization.alloc_tensor(%{{.*}}) copy(%{{.*}}) : tensor<?x5xf32>
+  %1 = bufferization.alloc_tensor(%sz) copy(%t) : tensor<?x5xf32>
+
   // CHECK: bufferization.alloc_tensor() : tensor<5x6xf32>
   %2 = bufferization.alloc_tensor() : tensor<5x6xf32>
+
   // CHECK: bufferization.alloc_tensor(%{{.*}}, %{{.*}}) : tensor<?x?xf32>
   %3 = bufferization.alloc_tensor(%sz, %sz) : tensor<?x?xf32>
-  // CHECK: bufferization.alloc_tensor() copy(%{{.*}}) {escape = true} : tensor<?x5xf32>
-  %4 = bufferization.alloc_tensor() copy(%t) {escape = true} : tensor<?x5xf32>
-  // CHECK: bufferization.alloc_tensor() copy(%{{.*}}) {escape = false} : tensor<?x5xf32>
-  %5 = bufferization.alloc_tensor() copy(%t) {escape = false} : tensor<?x5xf32>
+
+  // CHECK: bufferization.alloc_tensor(%{{.*}}) copy(%{{.*}}) {escape = true} : tensor<?x5xf32>
+  %4 = bufferization.alloc_tensor(%sz) copy(%t) {escape = true} : tensor<?x5xf32>
+
+  // CHECK: bufferization.alloc_tensor(%{{.*}}) copy(%{{.*}}) {escape = false} : tensor<?x5xf32>
+  %5 = bufferization.alloc_tensor(%sz) copy(%t) {escape = false} : tensor<?x5xf32>
+
+  // CHECK: bufferization.alloc_tensor() size_hint(
   %c100 = arith.constant 100 : index
-  // CHECK: bufferization.alloc_tensor() size_hint=
-  %6 = bufferization.alloc_tensor() size_hint=%c100 : tensor<100x100xf64, #CSR>
+  %6 = bufferization.alloc_tensor() size_hint(%c100) : tensor<100x100xf64, #CSR>
+
   // CHECK: bufferization.alloc_tensor(%{{.+}}) {memory_space = "foo"} : tensor<?xf32>
   %7 = bufferization.alloc_tensor(%sz) {memory_space = "foo"} : tensor<?xf32>
-  return %1 : tensor<?x5xf32>
+
+  // CHECK: bufferization.alloc_tensor(%[[sz]]) : tensor<?x5xf32>
+  %8 = bufferization.alloc_tensor(%sz) init {
+  ^bb0(%arg0: tensor<?x5xf32>):
+    bufferization.yield %arg0 : tensor<?x5xf32>
+  } : tensor<?x5xf32>
+
+  // CHECK: bufferization.alloc_tensor(%[[sz]]) copy(%[[t]]) : tensor<?x5xf32>
+  %9 = bufferization.alloc_tensor(%sz) init {
+  ^bb0(%arg0: tensor<?x5xf32>):
+    bufferization.yield %t : tensor<?x5xf32>
+  } : tensor<?x5xf32>
+
+  // CHECK: bufferization.alloc_tensor(%[[sz]]) init {
+  // CHECK: ^{{.*}}(%[[bbarg:.*]]: tensor<?x5xf32>):
+  // CHECK:   %[[filled:.*]] = linalg.fill {{.*}} outs(%[[bbarg]] : tensor<?x5xf32>)
+  // CHECK:   bufferization.yield %[[filled]]
+  // CHECK: }
+  %10 = bufferization.alloc_tensor(%sz) init {
+  ^bb0(%arg0: tensor<?x5xf32>):
+    %filled = linalg.fill ins(%f : f32) outs(%arg0 : tensor<?x5xf32>) -> tensor<?x5xf32>
+    bufferization.yield %filled : tensor<?x5xf32>
+  } : tensor<?x5xf32>
+
+  return
 }
 
 // CHECK-LABEL: func @test_dealloc_tensor_op
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-tensor-copy-insertion.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-tensor-copy-insertion.mlir
--- a/mlir/test/Dialect/SCF/one-shot-bufferize-tensor-copy-insertion.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize-tensor-copy-insertion.mlir
@@ -7,8 +7,8 @@
                    %lb : index, %ub : index, %step : index)
   -> (tensor<?xf32>, tensor<?xf32>)
 {
-  // CHECK: %[[A_copy:.*]] = bufferization.alloc_tensor() copy(%[[A]]) {bufferization.escape = [false]} : tensor<?xf32>
-  // CHECK: %[[B_copy:.*]] = bufferization.alloc_tensor() copy(%[[B]]) {bufferization.escape = [false]} : tensor<?xf32>
+  // CHECK: %[[A_copy:.*]] = bufferization.alloc_tensor(%{{.*}}) copy(%[[A]]) {bufferization.escape = [false]} : tensor<?xf32>
+  // CHECK: %[[B_copy:.*]] = bufferization.alloc_tensor(%{{.*}}) copy(%[[B]]) {bufferization.escape = [false]} : tensor<?xf32>
   // CHECK:   %[[for:.*]]:2 = scf.for {{.*}} iter_args(%[[iter1:.*]] = %[[A_copy]], %[[iter2:.*]] = %[[B_copy]])
   %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B)
       -> (tensor<?xf32>, tensor<?xf32>)
@@ -28,15 +28,15 @@
                                    %lb : index, %ub : index, %step : index)
   -> (tensor<?xf32>, tensor<?xf32>)
 {
-  // CHECK: %[[A_copy:.*]] = bufferization.alloc_tensor() copy(%[[A]]) {bufferization.escape = [false]} : tensor<?xf32>
-  // CHECK: %[[B_copy:.*]] = bufferization.alloc_tensor() copy(%[[B]]) {bufferization.escape = [false]} : tensor<?xf32>
+  // CHECK: %[[A_copy:.*]] = bufferization.alloc_tensor(%{{.*}}) copy(%[[A]]) {bufferization.escape = [false]} : tensor<?xf32>
+  // CHECK: %[[B_copy:.*]] = bufferization.alloc_tensor(%{{.*}}) copy(%[[B]]) {bufferization.escape = [false]} : tensor<?xf32>
   // CHECK:   %[[for:.*]]:2 = scf.for {{.*}} iter_args(%[[iter1:.*]] = %[[A_copy]], %[[iter2:.*]] = %[[B_copy]])
   %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B)
       -> (tensor<?xf32>, tensor<?xf32>)
   {
     // Yield tensors in different order.
-    // CHECK-DAG: %[[yield1:.*]] = bufferization.alloc_tensor() copy(%[[iter2]]) {bufferization.escape = [true]} : tensor<?xf32>
-    // CHECK-DAG: %[[yield2:.*]] = bufferization.alloc_tensor() copy(%[[iter1]]) {bufferization.escape = [true]} : tensor<?xf32>
+    // CHECK-DAG: %[[yield1:.*]] = bufferization.alloc_tensor(%{{.*}}) copy(%[[iter2]]) {bufferization.escape = [true]} : tensor<?xf32>
+    // CHECK-DAG: %[[yield2:.*]] = bufferization.alloc_tensor(%{{.*}}) copy(%[[iter1]]) {bufferization.escape = [true]} : tensor<?xf32>
     // CHECK: scf.yield %[[yield1]], %[[yield2]]
     scf.yield %tB, %tA : tensor<?xf32>, tensor<?xf32>
   }
diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir
--- a/mlir/test/Dialect/SparseTensor/codegen.mlir
+++ b/mlir/test/Dialect/SparseTensor/codegen.mlir
@@ -371,7 +371,7 @@
 // CHECK:       %[[A2:.*]] = memref.alloc(%[[M2]]) : memref<?xindex>
 // CHECK:       %[[A3:.*]] = memref.alloc(%[[HINT]]) : memref<?xf64>
 func.func @sparse_alloc_coo_with_size_hint(%arg0: index) -> tensor<10x20xf64, #Coo> {
-  %0 = bufferization.alloc_tensor()  size_hint=%arg0 : tensor<10x20xf64, #Coo>
+  %0 = bufferization.alloc_tensor() size_hint(%arg0) : tensor<10x20xf64, #Coo>
   %1 = sparse_tensor.load %0 : tensor<10x20xf64, #Coo>
   return %1 : tensor<10x20xf64, #Coo>
 }
diff --git a/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir b/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir
--- a/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir
+++ b/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir
@@ -21,7 +21,7 @@
 // CHECK:         %[[D0:.*]] = memref.load %[[DS]]{{\[}}%[[C0]]]
 // CHECK:         %[[D1:.*]] = memref.load %[[DS]]{{\[}}%[[C1]]]
 // CHECK:         %[[N:.*]] = call @getSparseTensorReaderNNZ(%[[R]])
-// CHECK:         %[[T:.*]] = bufferization.alloc_tensor(%[[D0]], %[[D1]]) size_hint=%[[N]]
+// CHECK:         %[[T:.*]] = bufferization.alloc_tensor(%[[D0]], %[[D1]]) size_hint(%[[N]])
 // CHECK:         %[[VB:.*]] = memref.alloca()
 // CHECK:         %[[T2:.*]] = scf.for %{{.*}} = %[[C0]] to %[[N]] step %[[C1]] iter_args(%[[A2:.*]] = %[[T]])
 // CHECK:           func.call @getSparseTensorReaderNextF32(%[[R]], %[[DS]], %[[VB]])
@@ -52,7 +52,7 @@
 // CHECK:         %[[D0:.*]] = memref.load %[[DS]]{{\[}}%[[C0]]]
 // CHECK:         %[[D1:.*]] = memref.load %[[DS]]{{\[}}%[[C1]]]
 // CHECK:         %[[N:.*]] = call @getSparseTensorReaderNNZ(%[[R]])
-// CHECK:         %[[T:.*]] = bufferization.alloc_tensor(%[[D0]], %[[D1]]) size_hint=%[[N]]
+// CHECK:         %[[T:.*]] = bufferization.alloc_tensor(%[[D0]], %[[D1]]) size_hint(%[[N]])
 // CHECK:         %[[VB:.*]] = memref.alloca()
 // CHECK:         %[[T2:.*]] = scf.for %{{.*}} = %[[C0]] to %[[N]] step %[[C1]] iter_args(%[[A2:.*]] = %[[T]])
 // CHECK:           func.call @getSparseTensorReaderNextF32(%[[R]], %[[DS]], %[[VB]])
diff --git a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
--- a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
@@ -267,9 +267,11 @@
 //  CHECK-SAME:     %[[t:.*]]: memref<?xf32, strided<[?], offset: ?>>
 func.func @pad_memory_space(%t: tensor<?xf32>, %h1: index, %f: f32, %pos: index) -> f32
 {
+  %c0 = arith.constant 0 : index
+  %d = tensor.dim %t, %c0 : tensor<?xf32>
   // CHECK: %[[alloc_tensor:.*]] = memref.alloc{{.*}} : memref<?xf32, 3>
   // CHECK: memref.copy %[[t]], %[[alloc_tensor]]
-  %0 = bufferization.alloc_tensor() copy(%t)
+  %0 = bufferization.alloc_tensor(%d) copy(%t)
       {memory_space = 3 : i64} : tensor<?xf32>
   // CHECK: %[[padded_alloc:.*]] = memref.alloc() {{.*}} : memref<15xf32, 3>
   // CHECK: linalg.map