diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td
@@ -415,6 +415,29 @@
           return mlir::bufferization::detail::defaultIsRepetitiveRegion(
               cast<BufferizableOpInterface>($_op.getOperation()), index);
         }]
+      >,
+      InterfaceMethod<
+        /*desc=*/[{
+          Return `true` if allocations are allowed inside the given region of
+          this op. By default, allocations are allowed.
+
+          This method is queried during TensorCopyInsertion. If an allocation
+          is attempted to be inserted in a region that does not allow
+          allocations, it is instead inserted in the parent region.
+
+          Note: This method should be overridden only if setting the insertion
+          point to the parent region is generally safe. In particular, changing
+          the insertion point is not safe if the dynamic extents of an
+          allocation depend on an SSA value defined in the region that disallows
+          allocations.
+        }],
+        /*retType=*/"bool",
+        /*methodName=*/"areAllocationsAllowedInRegion",
+        /*args=*/(ins "unsigned":$index),
+        /*methodBody=*/"",
+        /*defaultImplementation=*/[{
+          return true;
+        }]
       >
   ];
 
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@@ -96,12 +96,80 @@
   return !attr[opResult.getResultNumber()].cast<BoolAttr>().getValue();
 }
 
+/// Compute and set an allocation point for a tensor copy of the given shaped
+/// value. This function queries the BufferizableOpInterface to detect regions
+/// in which allocations are forbidden. In such a case, an allocation is placed
+/// in a parent region. Example:
+///
+/// vector.mask ... {
+///   vector.transfer_write %v[%c0], %t : vector<5xf32>, tensor<?xf32>
+/// } : ... -> tensor<?xf32>
+///
+/// In case %t bufferizes out-of-place, the allocation must be placed outside
+/// of vector.mask as per the op's BufferizableOpInterface implementation.
+///
+/// Note: Allocations may not jump repetitive regions. If an allocation would be
+/// placed in a different repetitive region, return failure. This indicates an
+/// incorrect implementation of the BufferizableOpInterface.
+///
+/// Note: If the new insertion point violates op dominance, return failure. This
+/// also indicates an incorrect implementation of the BufferizableOpInterface.
+///
+static LogicalResult
+setAllocationInsertionPoint(OpBuilder &b, Value shapedValue,
+                            const BufferizationOptions &options) {
+  Region *r = b.getInsertionBlock()->getParent();
+  Region *repetitiveRegion =
+      getEnclosingRepetitiveRegion(b.getInsertionBlock(), options);
+  Operation *ip = nullptr;
+  do {
+    Operation *op = r->getParentOp();
+    if (!op)
+      return success();
+    auto bufferizableOp = options.dynCastBufferizableOp(op);
+    if (bufferizableOp &&
+        !bufferizableOp.areAllocationsAllowedInRegion(r->getRegionNumber()))
+      ip = op;
+  } while ((r = r->getParentRegion()));
+
+  if (ip) {
+    // A custom insertion point is necessary.
+    if (getEnclosingRepetitiveRegion(ip, options) != repetitiveRegion)
+      // It is incorrect to set the buffer allocation point into a different
+      // repetitive region. This would effectively de-privatize a buffer.
+      return getOwnerOfValue(shapedValue)
+          ->emitError(
+              "unable to move tensor copy ip to different repetitive region");
+
+    // Check for op dominance errors.
+    if (auto bbArg = shapedValue.dyn_cast<BlockArgument>()) {
+      if (!bbArg.getParentBlock()->findAncestorOpInBlock(*ip))
+        // The computed insertion point violates op dominance.
+        return getOwnerOfValue(shapedValue)
+            ->emitError(
+                "unable to find suitable insertion point for tensor copy");
+    } else {
+      Operation *shapedOp = shapedValue.dyn_cast<OpResult>().getDefiningOp();
+      Operation *ipInBlock = shapedOp->getBlock()->findAncestorOpInBlock(*ip);
+      if (!ipInBlock || shapedOp == ipInBlock ||
+          ipInBlock->isBeforeInBlock(shapedOp))
+        // The computed insertion point violates op dominance.
+        return getOwnerOfValue(shapedValue)
+            ->emitError(
+                "unable to find suitable insertion point for tensor copy");
+    }
+    b.setInsertionPoint(ip);
+  }
+  return success();
+}
+
 /// Create an AllocTensorOp for the given shaped value. If `copy` is set, the
 /// shaped value is copied. Otherwise, a tensor with undefined contents is
 /// allocated.
 FailureOr<Value> bufferization::allocateTensorForShapedValue(
     OpBuilder &b, Location loc, Value shapedValue, bool escape,
     const BufferizationOptions &options, bool copy) {
+  OpBuilder::InsertionGuard g(b);
   Value tensor;
   if (shapedValue.getType().isa<RankedTensorType>()) {
     tensor = shapedValue;
@@ -137,6 +205,10 @@
       populateDynamicDimSizes(b, loc, tensor, dynamicSizes);
   }
 
+  // Compute insertion point for allocation.
+  if (failed(setAllocationInsertionPoint(b, shapedValue, options)))
+    return failure();
+
   // Create AllocTensorOp.
   auto allocTensorOp = b.create<AllocTensorOp>(loc, tensorType, dynamicSizes,
                                                copy ? tensor : Value());
diff --git a/mlir/lib/Dialect/Vector/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Vector/Transforms/BufferizableOpInterfaceImpl.cpp
--- a/mlir/lib/Dialect/Vector/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -201,6 +201,10 @@
                                 const AnalysisState &state) const {
     return BufferRelation::Equivalent;
   }
+
+  bool areAllocationsAllowedInRegion(Operation *op, unsigned index) const {
+    return false;
+  }
 };
 
 /// Bufferization of vector.yield. Replaced with a new vector.yield that
diff --git a/mlir/test/Dialect/Vector/bufferize.mlir b/mlir/test/Dialect/Vector/bufferize.mlir
--- a/mlir/test/Dialect/Vector/bufferize.mlir
+++ b/mlir/test/Dialect/Vector/bufferize.mlir
@@ -44,5 +44,20 @@
   return %0 : vector<16xf32>
 }
 
-// TODO: Add test case for vector.mask. The masked op can currently not
-// bufferize out-of-place, so the only test case is in one-shot-bufferize.mlir.
+// -----
+
+// CHECK-LABEL: func @mask(
+//  CHECK-SAME:     %[[t0:.*]]: tensor<?xf32>, %[[val:.*]]: vector<16xf32>
+//  CHECK-SAME:     %[[idx:.*]]: index, %[[mask:.*]]: vector<16xi1>)
+//   CHECK-DAG:   %[[m:.*]] = bufferization.to_memref %[[t0]]
+//   CHECK-DAG:   %[[c0:.*]] = arith.constant 0 : index
+//       CHECK:   %[[dim:.*]] = memref.dim %[[m]], %[[c0]]
+//       CHECK:   %[[alloc:.*]] = memref.alloc(%[[dim]])
+//       CHECK:   memref.copy %[[m]], %[[alloc]]
+//       CHECK:   vector.mask %[[mask]] { vector.transfer_write %[[val]], %[[alloc]][%[[idx]]] : vector<16xf32>, memref<?xf32> } : vector<16xi1>
+//       CHECK:   %[[r:.*]] = bufferization.to_tensor %[[alloc]] : memref<?xf32>
+//       CHECK:   return %[[r]]
+func.func @mask(%t0: tensor<?xf32>, %val: vector<16xf32>, %idx: index, %mask: vector<16xi1>) -> tensor<?xf32> {
+  %0 = vector.mask %mask { vector.transfer_write %val, %t0[%idx] : vector<16xf32>, tensor<?xf32> } : vector<16xi1> -> tensor<?xf32>
+  return %0 : tensor<?xf32>
+}