diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizationInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizationInterfaceImpl.cpp
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizationInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizationInterfaceImpl.cpp
@@ -25,14 +25,39 @@
 // TODO: These ops should implement BufferizableOpInterface directly when moved
 // to the Bufferization dialect.
 
-// TODO: These implementations are conservative and will likely have to be
-// loosened for partial bufferization.
-
 /// ToMemrefOp casts a tensor into a memref. The resulting memref is the memory
-/// location of the incoming tensor once it will be bufferized. In the anlysis,
-/// the incoming tensor is assumed to bufferize to a memory read and to an
-/// inplace memory write, since it is unknown what will happen to the resulting
-/// memref.
+/// location of the incoming tensor once it will be bufferized.
+///
+/// ToMemrefOps are generated during partial bufferization when passing the
+/// result of a not-yet-bufferized op into a bufferized op.
+///
+/// As an example, consider the following IR:
+///
+/// %t1 = "writing_op"(%t0) : tensor<?xf32>
+/// %t2 = "another_writing_op"(%t1) : tensor<?xf32>
+///
+/// In this example, the first op is unknown. Therefore, the analysis
+/// conservatively considers its result as not writable, preventing the second
+/// op from bufferizing inplace.
+///
+/// %t1 = "writing_op"(%t0) : tensor<?xf32>
+/// %t2 = "another_writing_op"(%t1) { inplace = [false] } : tensor<?xf32>
+///
+/// During bufferization, the first op is left as is, but the second op uses its
+/// result, wrapped in a ToMemrefOp.
+///
+/// %t1 = "writing_op"(%t0) : tensor<?xf32>
+/// %t1_memref = bufferization.to_memref %t1
+/// %t1_copy = memref.alloc
+/// memref.copy %t1_memref, %t1_copy
+/// "another_writing_op"(%t1_copy)
+///
+/// From an analysis perspective, ToMemrefOp operands bufferize to a memory
+/// read. They do not bufferize to a memory write. Since the analysis cannot
+/// analyze memref usage, it is unknown how the resulting memref it used. We
+/// can, however, be sure that the memref is not written to inplace, because
+/// buffer copies are inserted together with ToMemrefOps, as can be seen from
+/// the example above.
 struct ToMemrefOpInterface
     : public BufferizableOpInterface::ExternalModel<ToMemrefOpInterface,
                                                     bufferization::ToMemrefOp> {
@@ -52,13 +77,24 @@
 
   LogicalResult bufferize(Operation *op, OpBuilder &b,
                           BufferizationState &state) const {
+    // TODO: Need to insert memref.cast in certain situations?
+    auto toMemrefOp = cast<bufferization::ToMemrefOp>(op);
+    if (!state.isMapped(toMemrefOp.tensor()))
+      return success();
+
+    Value memref = state.lookupBuffer(toMemrefOp.tensor());
+    // Do not replace ToMemrefOps that were just created.
+    if (toMemrefOp.getResult() != memref)
+      toMemrefOp.replaceAllUsesWith(memref);
+
     return success();
   }
 };
 
-/// ToTensorOp conceptually loads a tensor from a memory location. Such ops do
-/// not lower any further, and they should have disappeared by the time the
-/// input is fully bufferized.
+/// ToTensorOp conceptually loads a tensor from a memory location. ToTensorOps
+/// are used at bufferization boundaries for partial bufferization. They do not
+/// lower any further, and should have disappeared by the time the input is
+/// fully bufferized.
 ///
 /// The analysis has no information about the memref that is loaded from by the
 /// ToTensorOp. We have to assume that the loaded tensor may after bufferization
@@ -66,6 +102,38 @@
 /// ToMemrefOp have no aliasing OpOperand/OpResult pairs, this cannot be encoded
 /// directly in the analysis. However, declaring ToTensorOp results as not
 /// writable also enforces a buffer copy and has the same effect.
+///
+/// OpOperands (of other ops) that are ToTensorOp results never bufferize
+/// inplace. This leads to additional buffer copies when gradually bufferizing
+/// IR compared to a one-shot bufferization.
+///
+/// As an example, consider the following IR:
+///
+/// %t1 = "writing_op"(%t0) : tensor<?xf32>
+/// %t2 = "another_writing_op"(%t1) : tensor<?xf32>
+///
+/// With progressive bufferization, both ops bufferize inplace in the absence of
+/// conflicts:
+///
+/// %t1 = "writing_op"(%t0) : tensor<?xf32> { inplace = [true] }
+/// %t2 = "another_writing_op"(%t1) : tensor<?xf32> { inplace = [true] }
+///
+/// Let's assume that after the first partial bufferization, the IR is in the
+/// following state:
+///
+/// %m = "memref_writing_op"(%m) : memref<?xf32>
+/// %m_tensor = bufferization.to_tensor %m
+/// %t2 = "another_writing_op"(%m_tensor) : tensor<?xf32>
+///
+/// In the above example, another_writing_op cannot bufferize inplace because
+/// ToTensorOp results are not writable.
+///
+/// Note: ToTensorOps are always inserted right before their use, so they are
+/// guaranteed to copy the most recent buffer contents.
+///
+/// It is up to other passes to reduce superfluous buffer copies again.
+/// Alternatively, such copies should not be introduced in the first place when
+/// bufferizing the entire IR in one shot.
 struct ToTensorOpInterface
     : public BufferizableOpInterface::ExternalModel<ToTensorOpInterface,
                                                     bufferization::ToTensorOp> {
@@ -82,7 +150,20 @@
   }
 
   bool isWritable(Operation *op, Value value, BufferizationState &state) const {
-    // It is unknown whether the MemRef operand is writable or not.
+    // ToTensorOps are generated at the partial bufferization boundary. The RaW
+    // analysis cannot analyze through ToTensorOps/ToMemrefOps, so buffer copies
+    // must be inserted whenever writing to a ToTensorOp result. Otherwise, two
+    // conflicting writing ops could bufferize inplace in such a way that the
+    // analysis would not consider them conflicting. E.g.:
+    //
+    // %t1 = "writing_op"(%t0) : tensor<?xf32>
+    // %t1_memref = bufferization.to_memref %t1
+    // %t1_tensor = bufferization.to_tensor %t1_memref
+    // %t2 = "another_writing_op"(%t1_tensor) : tensor<?xf32>
+    //
+    // In the above example, bufferizing writing_op and another_writing_op could
+    // bufferize inplace from a RaW analysis perspective, but isWritable = false
+    // forces a buffer copy.
     return false;
   }
 };
diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp
@@ -229,12 +229,6 @@
 /// Return true if opOperand has been decided to bufferize in-place.
 static bool isInplaceMemoryWrite(OpOperand &opOperand,
                                  const BufferizationAliasInfo &aliasInfo) {
-  // The analysis does not know what happens to the result of a ToMemrefOp, so
-  // we assume that it is written to.
-  // TODO: This is a conservative implementation. This rule will have to be
-  // relaxed for partial bufferization.
-  if (isa<bufferization::ToMemrefOp>(opOperand.getOwner()))
-    return true;
   // OpOperands without an aliasing OpResult do not write.
   OpResult opResult = getAliasingOpResult(opOperand);
   if (!opResult)
diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir
@@ -167,23 +167,3 @@
   }
   return %r: tensor<4xi32>
 }
-
-// -----
-
-func @to_memref_op_is_writing(
-    %t1: tensor<?xf32> {linalg.inplaceable = true}, %idx1: index,
-    %idx2: index, %idx3: index, %v1: vector<5xf32>) -> (vector<5xf32>, vector<5xf32>) {
-  // This is a RaW conflict because to_memref is an inplace write and %t1 is
-  // read further down. This will likely have to change with partial
-  // bufferization.
-
-  // expected-error @+1 {{input IR has RaW conflict}}
-  %0 = bufferization.to_memref %t1 : memref<?xf32>
-
-  // Read from both.
-  %cst = arith.constant 0.0 : f32
-  %r1 = vector.transfer_read %t1[%idx3], %cst : tensor<?xf32>, vector<5xf32>
-  %r2 = vector.transfer_read %0[%idx3], %cst : memref<?xf32>, vector<5xf32>
-
-  return %r1, %r2 : vector<5xf32>, vector<5xf32>
-}
diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-partial.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-partial.mlir
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-partial.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-partial.mlir
@@ -1,13 +1,22 @@
 // RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-module-bufferize="allow-return-memref allow-unknown-ops" -split-input-file | FileCheck %s
 
-// TODO: Bufferize result IR of bufferization.
-// TODO: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-module-bufferize="allow-return-memref allow-unknown-ops" -linalg-comprehensive-module-bufferize="allow-return-memref allow-unknown-ops" -split-input-file | FileCheck %s
+// Bufferize result IR of bufferization.
+// RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-module-bufferize="allow-return-memref allow-unknown-ops" -linalg-comprehensive-module-bufferize="allow-return-memref allow-unknown-ops" -split-input-file | FileCheck %s
 
 // Run fuzzer with different seeds.
 // RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-module-bufferize="test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
 // RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-module-bufferize="test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
 // RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-module-bufferize="test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
 
+// RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-tensor-bufferize -canonicalize -split-input-file | FileCheck %s --check-prefix=CHECK-TENSOR
+// RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-vector-bufferize -canonicalize -split-input-file | FileCheck %s --check-prefix=CHECK-VECTOR
+
+// RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-tensor-bufferize -canonicalize | \
+// RUN: mlir-opt -linalg-comprehensive-vector-bufferize -canonicalize | FileCheck %s --check-prefix=CHECK-TENSOR-VECTOR
+
+// RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-vector-bufferize -canonicalize | \
+// RUN: mlir-opt -linalg-comprehensive-tensor-bufferize -canonicalize | FileCheck %s --check-prefix=CHECK-VECTOR-TENSOR
+
 // CHECK-LABEL: func @use_of_unknown_op_1(
 //  CHECK-SAME:     %[[m1:.*]]: memref<?xf32
 func @use_of_unknown_op_1(%t1: tensor<?xf32> {linalg.inplaceable = true})
@@ -95,15 +104,23 @@
 
 // CHECK-LABEL: func @use_of_bufferizable_op_in_unbufferizable_op
 //  CHECK-SAME:     %[[m1:.*]]: memref<?xf32
+// CHECK-TENSOR-LABEL: func @use_of_bufferizable_op_in_unbufferizable_op
+//  CHECK-TENSOR-SAME:     %[[m1:.*]]: tensor<?xf32>
 func @use_of_bufferizable_op_in_unbufferizable_op(
     %t1: tensor<?xf32>, %o: index, %s: index) -> (tensor<?xf32>, tensor<?xf32>) {
   // CHECK: %[[subview:.*]] = memref.subview %[[m1]]
+  // CHECK-TENSOR: %[[m1_memref:.*]] = bufferization.to_memref %[[m1]]
+  // CHECK-TENSOR: %[[subview:.*]] = memref.subview %[[m1_memref]]
   %0 = tensor.extract_slice %t1[%o][%s][1] : tensor<?xf32> to tensor<?xf32>
   // CHECK: %[[subview_tensor:.*]] = bufferization.to_tensor %[[subview]]
   // CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[subview_tensor]])
+  // CHECK-TENSOR: %[[subview_tensor:.*]] = bufferization.to_tensor %[[subview]]
+  // CHECK-TENSOR: %[[dummy:.*]] = "test.dummy_op"(%[[subview_tensor]])
   %1 = "test.dummy_op"(%0) : (tensor<?xf32>) -> tensor<?xf32>
   // CHECK: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]]
   // CHECK: return %[[subview]], %[[dummy_memref]]
+  // CHECK-TENSOR: %[[subview_tensor:.*]] = bufferization.to_tensor %[[subview]]
+  // CHECK-TENSOR: return %[[subview_tensor]], %[[dummy]]
   return %0, %1 : tensor<?xf32>, tensor<?xf32>
 }
 
@@ -148,3 +165,113 @@
   // CHECK: return %[[alloc]]
   return %1 : tensor<?xf32>
 }
+
+// -----
+
+// CHECK-LABEL: func @write_to_same_tensor_1
+//  CHECK-SAME:     %[[t1:.*]]: memref<?xf32
+func @write_to_same_tensor_1(
+    %t1: tensor<?xf32> {linalg.inplaceable = true}, %idx1: index,
+    %idx2: index, %idx3: index, %v1: vector<5xf32>) -> (tensor<?xf32>, tensor<?xf32>) {
+  // CHECK: %[[t1_tensor:.*]] = bufferization.to_tensor %[[t1]]
+  // Note: bufferization.to_tensor ops are not writable. So dummy_op will not
+  // bufferize inplace.
+  // CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[t1_tensor]])
+  // CHECK: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]]
+  %0 = "test.dummy_op"(%t1) : (tensor<?xf32>) -> tensor<?xf32>
+
+  // Read from both.
+  %cst = arith.constant 0.0 : f32
+  // CHECK: vector.transfer_write %{{.*}}, %[[t1]]
+  %r1 = vector.transfer_write %v1, %t1[%idx3] : vector<5xf32>, tensor<?xf32>
+
+  // %0 is not writable. So create a copy here.
+  // CHECK: %[[alloc:.*]] = memref.alloc
+  // CHECK: %[[subview:.*]] = memref.subview %[[dummy_memref]]
+  // CHECK: linalg.copy(%[[subview]], %[[alloc]])
+  %e = tensor.extract_slice %0[%idx1][%idx2][1] : tensor<?xf32> to tensor<?xf32>
+
+  // CHECK: vector.transfer_write %{{.*}}, %[[alloc]]
+  %r2 = vector.transfer_write %v1, %e[%idx3] : vector<5xf32>, tensor<?xf32>
+
+  // CHECK: return %[[alloc]]
+  return %r1, %r2 : tensor<?xf32>, tensor<?xf32>
+}
+
+// -----
+
+// TODO: Check insertion point of to_tensor and to_memref ops.
+
+// CHECK-TENSOR-LABEL: func @writing_op_sequence
+//  CHECK-TENSOR-SAME:     %[[t1:.*]]: tensor<?xf32>
+//   CHECK-TENSOR-NOT:   tensor.insert
+
+// CHECK-VECTOR-LABEL: func @writing_op_sequence
+//  CHECK-VECTOR-SAME:     %[[t1:.*]]: tensor<?xf32>
+//   CHECK-VECTOR-NOT:   vector.transfer_write
+
+// CHECK-TENSOR-VECTOR-LABEL: func @writing_op_sequence
+//  CHECK-TENSOR-VECTOR-SAME:     %[[t1:.*]]: tensor<?xf32>
+//   CHECK-TENSOR-VECTOR-NOT:   tensor.insert
+//   CHECK-TENSOR-VECTOR-NOT:   vector.transfer_write
+
+// CHECK-VECTOR-TENSOR-LABEL: func @writing_op_sequence
+//  CHECK-VECTOR-TENSOR-SAME:     %[[t1:.*]]: tensor<?xf32>
+func @writing_op_sequence(%t1: tensor<?xf32>, %s: f32,
+                          %i: index, %v: vector<5xf32>) -> tensor<?xf32> {
+  // CHECK-TENSOR: %[[t1_memref:.*]] = bufferization.to_memref %[[t1]]
+  // CHECK-TENSOR: %[[alloc:.*]] = memref.alloc
+  // CHECK-TENSOR: %[[casted:.*]] = memref.cast %[[alloc]]
+  // CHECK-TENSOR: memref.copy %[[t1_memref]], %[[casted]]
+  // CHECK-TENSOR: memref.store %{{.*}}, %[[alloc]]
+
+  // CHECK-VECTOR: %[[write1:.*]] = tensor.insert %{{.*}} into %[[t1]]
+
+  // CHECK-TENSOR-VECTOR: %[[t1_memref:.*]] = bufferization.to_memref %[[t1]]
+  // CHECK-TENSOR-VECTOR: %[[alloc:.*]] = memref.alloc
+  // CHECK-TENSOR-VECTOR: %[[casted:.*]] = memref.cast %[[alloc]]
+  // CHECK-TENSOR-VECTOR: %[[alloc2:.*]] = memref.alloc
+  // CHECK-TENSOR-VECTOR: %[[casted2:.*]] = memref.cast %[[alloc2]]
+  // CHECK-TENSOR-VECTOR: memref.copy %[[t1_memref]], %[[casted]]
+  // CHECK-TENSOR-VECTOR: memref.store %{{.*}}, %[[alloc]]
+
+  // CHECK-VECTOR-TENSOR: %[[t1_memref:.*]] = bufferization.to_memref %[[t1]]
+  // CHECK-VECTOR-TENSOR: %[[alloc:.*]] = memref.alloc
+  // CHECK-VECTOR-TENSOR: %[[casted:.*]] = memref.cast %[[alloc]]
+  // CHECK-VECTOR-TENSOR: memref.copy %[[t1_memref]], %[[casted]]
+  // CHECK-VECTOR-TENSOR: memref.store %{{.*}}, %[[alloc]]
+  %write1 = tensor.insert %s into %t1[%i] : tensor<?xf32>
+
+  // CHECK-TENSOR: %[[t1_tensor:.*]] = bufferization.to_tensor %[[casted]]
+  // CHECK-TENSOR: %[[write2:.*]] = vector.transfer_write %{{.*}}, %[[t1_tensor]]
+
+  // CHECK-VECTOR: %[[write1_memref:.*]] = bufferization.to_memref %[[write1]]
+  // CHECK-VECTOR: %[[alloc:.*]] = memref.alloc
+  // CHECK-VECTOR: %[[casted:.*]] = memref.cast %[[alloc]]
+  // CHECK-VECTOR: memref.copy %[[write1_memref]], %[[casted]]
+  // CHECK-VECTOR: vector.transfer_write %{{.*}}, %[[alloc]]
+
+  // CHECK-TENSOR-VECTOR: memref.copy %[[casted]], %[[casted2]]
+  // CHECK-TENSOR-VECTOR: vector.transfer_write %{{.*}}, %[[alloc2]]
+
+  // CHECK-VECTOR-TENSOR: %[[alloc2:.*]] = memref.alloc
+  // CHECK-VECTOR-TENSOR: %[[casted2:.*]] = memref.cast %[[alloc2]]
+  // CHECK-VECTOR-TENSOR: memref.copy %[[casted]], %[[casted2]]
+  // CHECK-VECTOR-TENSOR: vector.transfer_write %{{.*}}, %[[alloc2]]
+  %write2 = vector.transfer_write %v, %write1[%i] : vector<5xf32>, tensor<?xf32>
+
+  // CHECK-TENSOR: return %[[write2]]
+
+  // CHECK-VECTOR: %[[write2_tensor:.*]] = bufferization.to_tensor %[[casted]]
+  // CHECK-VECTOR: return %[[write2_tensor]]
+
+  // CHECK-TENSOR-VECTOR: %[[write2_tensor:.*]] = bufferization.to_tensor %[[casted2]]
+  // CHECK-TENSOR-VECTOR: return %[[write2_tensor]]
+
+  // CHECK-VECTOR-TENSOR: %[[write2_tensor:.*]] = bufferization.to_tensor %[[casted2]]
+  // CHECK-VECTOR-TENSOR: return %[[write2_tensor]]
+  return %write2 : tensor<?xf32>
+}
+
+
+