diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir
@@ -70,13 +70,7 @@
     %c7 = arith.constant 7 : index
 
     // Setup matrix memory that is initialized to zero.
-    %xdata = memref.alloc() : memref<7x3xf64>
-    scf.for %i = %c0 to %c7 step %c1 {
-      scf.for %j = %c0 to %c3 step %c1 {
-        memref.store %d0, %xdata[%i, %j] : memref<7x3xf64>
-      }
-    }
-    %x = bufferization.to_tensor %xdata : memref<7x3xf64>
+    %x = arith.constant dense<0.000000e+00> : tensor<7x3xf64>
 
     // Read the sparse tensor from file, construct sparse storage.
     %fileName = call @getTensorFilename(%c0) : (index) -> (!Filename)
@@ -96,14 +90,12 @@
     // CHECK: ( 0, 0, 0 )
     // CHECK: ( 7, 0, 0 )
     //
-    %r = bufferization.to_memref %0 : memref<7x3xf64>
     scf.for %i = %c0 to %c7 step %c1 {
-      %v = vector.transfer_read %r[%i, %c0], %d0: memref<7x3xf64>, vector<3xf64>
+      %v = vector.transfer_read %0[%i, %c0], %d0: tensor<7x3xf64>, vector<3xf64>
       vector.print %v : vector<3xf64>
     }
 
     // Release the resources.
-    memref.dealloc %xdata : memref<7x3xf64>
     bufferization.dealloc_tensor %a : tensor<7x3x3x3x3x3x5x3xf64, #SparseTensor>
 
     return
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir
@@ -44,11 +44,11 @@
   //
   // Kernel that uses index in the index notation (conjunction).
   //
-  func.func @sparse_index_1d_conj(%arga: tensor<8xi64, #SparseVector>) -> tensor<8xi64> {
-    %init = linalg.init_tensor [8] : tensor<8xi64>
+  func.func @sparse_index_1d_conj(%arga: tensor<8xi64, #SparseVector>,
+                                  %out: tensor<8xi64>) -> tensor<8xi64> {
     %r = linalg.generic #trait_1d
         ins(%arga: tensor<8xi64, #SparseVector>)
-       outs(%init: tensor<8xi64>) {
+       outs(%out: tensor<8xi64>) {
         ^bb(%a: i64, %x: i64):
           %i = linalg.index 0 : index
           %ii = arith.index_cast %i : index to i64
@@ -61,11 +61,11 @@
   //
   // Kernel that uses index in the index notation (disjunction).
   //
-  func.func @sparse_index_1d_disj(%arga: tensor<8xi64, #SparseVector>) -> tensor<8xi64> {
-    %init = linalg.init_tensor [8] : tensor<8xi64>
+  func.func @sparse_index_1d_disj(%arga: tensor<8xi64, #SparseVector>,
+                                  %out: tensor<8xi64>) -> tensor<8xi64> {
     %r = linalg.generic #trait_1d
         ins(%arga: tensor<8xi64, #SparseVector>)
-       outs(%init: tensor<8xi64>) {
+       outs(%out: tensor<8xi64>) {
         ^bb(%a: i64, %x: i64):
           %i = linalg.index 0 : index
           %ii = arith.index_cast %i : index to i64
@@ -78,11 +78,11 @@
   //
   // Kernel that uses indices in the index notation (conjunction).
   //
-  func.func @sparse_index_2d_conj(%arga: tensor<3x4xi64, #SparseMatrix>) -> tensor<3x4xi64> {
-    %init = linalg.init_tensor [3,4] : tensor<3x4xi64>
+  func.func @sparse_index_2d_conj(%arga: tensor<3x4xi64, #SparseMatrix>,
+                                  %out: tensor<3x4xi64>) -> tensor<3x4xi64> {
     %r = linalg.generic #trait_2d
         ins(%arga: tensor<3x4xi64, #SparseMatrix>)
-       outs(%init: tensor<3x4xi64>) {
+       outs(%out: tensor<3x4xi64>) {
         ^bb(%a: i64, %x: i64):
           %i = linalg.index 0 : index
           %j = linalg.index 1 : index
@@ -98,11 +98,11 @@
   //
   // Kernel that uses indices in the index notation (disjunction).
   //
-  func.func @sparse_index_2d_disj(%arga: tensor<3x4xi64, #SparseMatrix>) -> tensor<3x4xi64> {
-    %init = linalg.init_tensor [3,4] : tensor<3x4xi64>
+  func.func @sparse_index_2d_disj(%arga: tensor<3x4xi64, #SparseMatrix>,
+                                  %out: tensor<3x4xi64>) -> tensor<3x4xi64> {
     %r = linalg.generic #trait_2d
         ins(%arga: tensor<3x4xi64, #SparseMatrix>)
-       outs(%init: tensor<3x4xi64>) {
+       outs(%out: tensor<3x4xi64>) {
         ^bb(%a: i64, %x: i64):
           %i = linalg.index 0 : index
           %j = linalg.index 1 : index
@@ -140,25 +140,19 @@
                                   [ 1,  1,  3,  4 ] ]> : tensor<3x4xi64>
     %dm = sparse_tensor.convert %m2 : tensor<3x4xi64> to tensor<3x4xi64, #SparseMatrix>
 
+    // Setup out tensors.
+    %init_8 = bufferization.alloc_tensor() : tensor<8xi64>
+    %init_3_4 = bufferization.alloc_tensor() : tensor<3x4xi64>
+
     // Call the kernels.
-    %0 = call @sparse_index_1d_conj(%sv) : (tensor<8xi64, #SparseVector>) -> tensor<8xi64>
-    %1 = call @sparse_index_1d_disj(%sv) : (tensor<8xi64, #SparseVector>) -> tensor<8xi64>
-    %2 = call @sparse_index_1d_conj(%dv) : (tensor<8xi64, #SparseVector>) -> tensor<8xi64>
-    %3 = call @sparse_index_1d_disj(%dv) : (tensor<8xi64, #SparseVector>) -> tensor<8xi64>
-    %4 = call @sparse_index_2d_conj(%sm) : (tensor<3x4xi64, #SparseMatrix>) -> tensor<3x4xi64>
-    %5 = call @sparse_index_2d_disj(%sm) : (tensor<3x4xi64, #SparseMatrix>) -> tensor<3x4xi64>
-    %6 = call @sparse_index_2d_conj(%dm) : (tensor<3x4xi64, #SparseMatrix>) -> tensor<3x4xi64>
-    %7 = call @sparse_index_2d_disj(%dm) : (tensor<3x4xi64, #SparseMatrix>) -> tensor<3x4xi64>
-
-    // Get the backing buffers.
-    %mem0 = bufferization.to_memref %0 : memref<8xi64>
-    %mem1 = bufferization.to_memref %1 : memref<8xi64>
-    %mem2 = bufferization.to_memref %2 : memref<8xi64>
-    %mem3 = bufferization.to_memref %3 : memref<8xi64>
-    %mem4 = bufferization.to_memref %4 : memref<3x4xi64>
-    %mem5 = bufferization.to_memref %5 : memref<3x4xi64>
-    %mem6 = bufferization.to_memref %6 : memref<3x4xi64>
-    %mem7 = bufferization.to_memref %7 : memref<3x4xi64>
+    %0 = call @sparse_index_1d_conj(%sv, %init_8) : (tensor<8xi64, #SparseVector>, tensor<8xi64>) -> tensor<8xi64>
+    %1 = call @sparse_index_1d_disj(%sv, %init_8) : (tensor<8xi64, #SparseVector>, tensor<8xi64>) -> tensor<8xi64>
+    %2 = call @sparse_index_1d_conj(%dv, %init_8) : (tensor<8xi64, #SparseVector>, tensor<8xi64>) -> tensor<8xi64>
+    %3 = call @sparse_index_1d_disj(%dv, %init_8) : (tensor<8xi64, #SparseVector>, tensor<8xi64>) -> tensor<8xi64>
+    %4 = call @sparse_index_2d_conj(%sm, %init_3_4) : (tensor<3x4xi64, #SparseMatrix>, tensor<3x4xi64>) -> tensor<3x4xi64>
+    %5 = call @sparse_index_2d_disj(%sm, %init_3_4) : (tensor<3x4xi64, #SparseMatrix>, tensor<3x4xi64>) -> tensor<3x4xi64>
+    %6 = call @sparse_index_2d_conj(%dm, %init_3_4) : (tensor<3x4xi64, #SparseMatrix>, tensor<3x4xi64>) -> tensor<3x4xi64>
+    %7 = call @sparse_index_2d_disj(%dm, %init_3_4) : (tensor<3x4xi64, #SparseMatrix>, tensor<3x4xi64>) -> tensor<3x4xi64>
 
     //
     // Verify result.
@@ -172,14 +166,14 @@
     // CHECK-NEXT: ( ( 0, 0, 0, 0 ), ( 0, 2, 2, 3 ), ( 0, 2, 12, 24 ) )
     // CHECK-NEXT: ( ( 1, 2, 3, 4 ), ( 2, 4, 4, 5 ), ( 3, 4, 7, 9 ) )
     //
-    %vv0 = vector.transfer_read %mem0[%c0], %du: memref<8xi64>, vector<8xi64>
-    %vv1 = vector.transfer_read %mem1[%c0], %du: memref<8xi64>, vector<8xi64>
-    %vv2 = vector.transfer_read %mem2[%c0], %du: memref<8xi64>, vector<8xi64>
-    %vv3 = vector.transfer_read %mem3[%c0], %du: memref<8xi64>, vector<8xi64>
-    %vv4 = vector.transfer_read %mem4[%c0,%c0], %du: memref<3x4xi64>, vector<3x4xi64>
-    %vv5 = vector.transfer_read %mem5[%c0,%c0], %du: memref<3x4xi64>, vector<3x4xi64>
-    %vv6 = vector.transfer_read %mem6[%c0,%c0], %du: memref<3x4xi64>, vector<3x4xi64>
-    %vv7 = vector.transfer_read %mem7[%c0,%c0], %du: memref<3x4xi64>, vector<3x4xi64>
+    %vv0 = vector.transfer_read %0[%c0], %du: tensor<8xi64>, vector<8xi64>
+    %vv1 = vector.transfer_read %1[%c0], %du: tensor<8xi64>, vector<8xi64>
+    %vv2 = vector.transfer_read %2[%c0], %du: tensor<8xi64>, vector<8xi64>
+    %vv3 = vector.transfer_read %3[%c0], %du: tensor<8xi64>, vector<8xi64>
+    %vv4 = vector.transfer_read %4[%c0,%c0], %du: tensor<3x4xi64>, vector<3x4xi64>
+    %vv5 = vector.transfer_read %5[%c0,%c0], %du: tensor<3x4xi64>, vector<3x4xi64>
+    %vv6 = vector.transfer_read %6[%c0,%c0], %du: tensor<3x4xi64>, vector<3x4xi64>
+    %vv7 = vector.transfer_read %7[%c0,%c0], %du: tensor<3x4xi64>, vector<3x4xi64>
     vector.print %vv0 : vector<8xi64>
     vector.print %vv1 : vector<8xi64>
     vector.print %vv2 : vector<8xi64>
@@ -194,14 +188,6 @@
     bufferization.dealloc_tensor %dv : tensor<8xi64, #SparseVector>
     bufferization.dealloc_tensor %sm : tensor<3x4xi64, #SparseMatrix>
     bufferization.dealloc_tensor %dm : tensor<3x4xi64, #SparseMatrix>
-    memref.dealloc %mem0 : memref<8xi64>
-    memref.dealloc %mem1 : memref<8xi64>
-    memref.dealloc %mem2 : memref<8xi64>
-    memref.dealloc %mem3 : memref<8xi64>
-    memref.dealloc %mem4 : memref<3x4xi64>
-    memref.dealloc %mem5 : memref<3x4xi64>
-    memref.dealloc %mem6 : memref<3x4xi64>
-    memref.dealloc %mem7 : memref<3x4xi64>
 
     return
   }
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir
@@ -75,18 +75,18 @@
     %a = sparse_tensor.new %fileName : !Filename to tensor<?x?xi32, #SparseMatrix>
 
     // Initialize dense vectors.
-    %bdata = memref.alloc(%c256) : memref<?xi32>
-    %xdata = memref.alloc(%c4) : memref<?xi32>
-    scf.for %i = %c0 to %c256 step %c1 {
+    %init_256 = bufferization.alloc_tensor(%c256) : tensor<?xi32>
+    %b = scf.for %i = %c0 to %c256 step %c1 iter_args(%t = %init_256) -> tensor<?xi32> {
       %k = arith.addi %i, %c1 : index
       %j = arith.index_cast %k : index to i32
-      memref.store %j, %bdata[%i] : memref<?xi32>
+      %t2 = tensor.insert %j into %t[%i] : tensor<?xi32>
+      scf.yield %t2 : tensor<?xi32>
     }
-    scf.for %i = %c0 to %c4 step %c1 {
-      memref.store %i0, %xdata[%i] : memref<?xi32>
+    %init_4 = bufferization.alloc_tensor(%c4) : tensor<?xi32>
+    %x = scf.for %i = %c0 to %c4 step %c1 iter_args(%t = %init_4) -> tensor<?xi32> {
+      %t2 = tensor.insert %i0 into %t[%i] : tensor<?xi32>
+      scf.yield %t2 : tensor<?xi32>
     }
-    %b = bufferization.to_tensor %bdata : memref<?xi32>
-    %x = bufferization.to_tensor %xdata : memref<?xi32>
 
     // Call kernel.
     %0 = call @kernel_matvec(%a, %b, %x)
@@ -96,13 +96,10 @@
     //
     // CHECK: ( 889, 1514, -21, -3431 )
     //
-    %m = bufferization.to_memref %0 : memref<?xi32>
-    %v = vector.transfer_read %m[%c0], %i0: memref<?xi32>, vector<4xi32>
+    %v = vector.transfer_read %0[%c0], %i0: tensor<?xi32>, vector<4xi32>
     vector.print %v : vector<4xi32>
 
     // Release the resources.
-    memref.dealloc %bdata : memref<?xi32>
-    memref.dealloc %xdata : memref<?xi32>
     bufferization.dealloc_tensor %a : tensor<?x?xi32, #SparseMatrix>
 
     return
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir
@@ -66,55 +66,58 @@
   //
   func.func @entry() {
     %f0 = arith.constant 0.0 : f64
-    %c0 = arith.constant 0 : index
-    %c1 = arith.constant 1 : index
-    %c2 = arith.constant 2 : index
+    %cst0 = arith.constant 0 : index
+    %cst1 = arith.constant 1 : index
+    %cst2 = arith.constant 2 : index
 
     // Read the sparse input tensor B from a file.
-    %fileName = call @getTensorFilename(%c0) : (index) -> (!Filename)
+    %fileName = call @getTensorFilename(%cst0) : (index) -> (!Filename)
     %b = sparse_tensor.new %fileName
           : !Filename to tensor<?x?x?xf64, #SparseTensor>
 
     // Get sizes from B, pick a fixed size for dim-2 of A.
-    %isz = tensor.dim %b, %c0 : tensor<?x?x?xf64, #SparseTensor>
+    %isz = tensor.dim %b, %cst0 : tensor<?x?x?xf64, #SparseTensor>
     %jsz = arith.constant 5 : index
-    %ksz = tensor.dim %b, %c1 : tensor<?x?x?xf64, #SparseTensor>
-    %lsz = tensor.dim %b, %c2 : tensor<?x?x?xf64, #SparseTensor>
+    %ksz = tensor.dim %b, %cst1 : tensor<?x?x?xf64, #SparseTensor>
+    %lsz = tensor.dim %b, %cst2 : tensor<?x?x?xf64, #SparseTensor>
 
     // Initialize dense input matrix C.
-    %cdata = memref.alloc(%ksz, %jsz) : memref<?x?xf64>
-    scf.for %k = %c0 to %ksz step %c1 {
-      scf.for %j = %c0 to %jsz step %c1 {
+    %c0 = bufferization.alloc_tensor(%ksz, %jsz) : tensor<?x?xf64>
+    %c = scf.for %k = %cst0 to %ksz step %cst1 iter_args(%c1 = %c0) -> tensor<?x?xf64> {
+      %c2 = scf.for %j = %cst0 to %jsz step %cst1 iter_args(%c3 = %c1) -> tensor<?x?xf64> {
         %k0 = arith.muli %k, %jsz : index
         %k1 = arith.addi %k0, %j : index
         %k2 = arith.index_cast %k1 : index to i32
         %kf = arith.sitofp %k2 : i32 to f64
-        memref.store %kf, %cdata[%k, %j] : memref<?x?xf64>
+        %c4 = tensor.insert %kf into %c3[%k, %j] : tensor<?x?xf64>
+        scf.yield %c4 : tensor<?x?xf64>
       }
+      scf.yield %c2 : tensor<?x?xf64>
     }
-    %c = bufferization.to_tensor %cdata : memref<?x?xf64>
 
     // Initialize dense input matrix D.
-    %ddata = memref.alloc(%lsz, %jsz) : memref<?x?xf64>
-    scf.for %l = %c0 to %lsz step %c1 {
-      scf.for %j = %c0 to %jsz step %c1 {
+    %d0 = bufferization.alloc_tensor(%lsz, %jsz) : tensor<?x?xf64>
+    %d = scf.for %l = %cst0 to %lsz step %cst1 iter_args(%d1 = %d0) -> tensor<?x?xf64> {
+      %d2 = scf.for %j = %cst0 to %jsz step %cst1 iter_args(%d3 = %d1) -> tensor<?x?xf64> {
         %k0 = arith.muli %l, %jsz : index
         %k1 = arith.addi %k0, %j : index
         %k2 = arith.index_cast %k1 : index to i32
         %kf = arith.sitofp %k2 : i32 to f64
-        memref.store %kf, %ddata[%l, %j] : memref<?x?xf64>
+        %d4 = tensor.insert %kf into %d3[%l, %j] : tensor<?x?xf64>
+        scf.yield %d4 : tensor<?x?xf64>
       }
+      scf.yield %d2 : tensor<?x?xf64>
     }
-    %d = bufferization.to_tensor %ddata : memref<?x?xf64>
 
     // Initialize dense output matrix A.
-    %adata = memref.alloc(%isz, %jsz) : memref<?x?xf64>
-    scf.for %i = %c0 to %isz step %c1 {
-      scf.for %j = %c0 to %jsz step %c1 {
-        memref.store %f0, %adata[%i, %j] : memref<?x?xf64>
+    %a0 = bufferization.alloc_tensor(%isz, %jsz) : tensor<?x?xf64>
+    %a = scf.for %i = %cst0 to %isz step %cst1 iter_args(%a1 = %a0) -> tensor<?x?xf64> {
+      %a2 = scf.for %j = %cst0 to %jsz step %cst1 iter_args(%a3 = %a1) -> tensor<?x?xf64> {
+        %a4 = tensor.insert %f0 into %a3[%i, %j] : tensor<?x?xf64>
+        scf.yield %a4 : tensor<?x?xf64>
       }
+      scf.yield %a2 : tensor<?x?xf64>
     }
-    %a = bufferization.to_tensor %adata : memref<?x?xf64>
 
     // Call kernel.
     %0 = call @kernel_mttkrp(%b, %c, %d, %a)
@@ -126,15 +129,11 @@
     // CHECK: ( ( 16075, 21930, 28505, 35800, 43815 ),
     // CHECK:   ( 10000, 14225, 19180, 24865, 31280 ) )
     //
-    %m = bufferization.to_memref %0 : memref<?x?xf64>
-    %v = vector.transfer_read %m[%c0, %c0], %f0
-          : memref<?x?xf64>, vector<2x5xf64>
+    %v = vector.transfer_read %0[%cst0, %cst0], %f0
+          : tensor<?x?xf64>, vector<2x5xf64>
     vector.print %v : vector<2x5xf64>
 
     // Release the resources.
-    memref.dealloc %adata : memref<?x?xf64>
-    memref.dealloc %cdata : memref<?x?xf64>
-    memref.dealloc %ddata : memref<?x?xf64>
     bufferization.dealloc_tensor %b : tensor<?x?x?xf64, #SparseTensor>
 
     return
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reshape.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reshape.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reshape.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reshape.mlir
@@ -129,12 +129,8 @@
     bufferization.dealloc_tensor %collapse3 : tensor<12xf64, #SparseVector>
 
     // Release dense resources.
-    // TODO(springerm): Replace these with a bufferization.release op (operating
-    // on tensors).
-    %meme1 = bufferization.to_memref %expand1 : memref<3x4xf64>
-    memref.dealloc %meme1 : memref<3x4xf64>
-    %memc1 = bufferization.to_memref %collapse1 : memref<12xf64>
-    memref.dealloc %memc1 : memref<12xf64>
+    bufferization.dealloc_tensor %expand1 : tensor<3x4xf64>
+    bufferization.dealloc_tensor %collapse1 : tensor<12xf64>
 
     return
   }
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir
@@ -73,24 +73,26 @@
     %c10 = arith.constant 10 : index
 
     // Setup memory for the dense matrices and initialize.
-    %adata = memref.alloc(%c5, %c10) : memref<?x?xf32>
-    %bdata = memref.alloc(%c10, %c5) : memref<?x?xf32>
-    %xdata = memref.alloc(%c5,  %c5) : memref<?x?xf32>
-    scf.for %i = %c0 to %c5 step %c1 {
-      scf.for %j = %c0 to %c5 step %c1 {
-        memref.store %d0, %xdata[%i, %j] : memref<?x?xf32>
+    %a0 = bufferization.alloc_tensor(%c5, %c10) : tensor<?x?xf32>
+    %b0 = bufferization.alloc_tensor(%c10, %c5) : tensor<?x?xf32>
+    %x0 = bufferization.alloc_tensor(%c5, %c5) : tensor<?x?xf32>
+    %a, %b, %x = scf.for %i = %c0 to %c5 step %c1 iter_args(%a1 = %a0, %b1 = %b0, %x1 = %x0)
+        -> (tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>) {
+      %x2 = scf.for %j = %c0 to %c5 step %c1 iter_args(%x3 = %x0) -> (tensor<?x?xf32>) {
+        %x4 = tensor.insert %d0 into %x3[%i, %j] : tensor<?x?xf32>
+        scf.yield %x4 : tensor<?x?xf32>
       }
       %p = arith.addi %i, %c1 : index
       %q = arith.index_cast %p : index to i32
       %d = arith.sitofp %q : i32 to f32
-      scf.for %j = %c0 to %c10 step %c1 {
-        memref.store %d, %adata[%i, %j] : memref<?x?xf32>
-        memref.store %d, %bdata[%j, %i] : memref<?x?xf32>
+      %a2, %b2 = scf.for %j = %c0 to %c10 step %c1 iter_args(%a3 = %a1, %b3 = %b1)
+          -> (tensor<?x?xf32>, tensor<?x?xf32>) {
+        %a4 = tensor.insert %d into %a3[%i, %j] : tensor<?x?xf32>
+        %b4 = tensor.insert %d into %b3[%j, %i] : tensor<?x?xf32>
+        scf.yield %a4, %b4 : tensor<?x?xf32>, tensor<?x?xf32>
       }
+      scf.yield %a2, %b2, %x2 : tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>
     }
-    %a = bufferization.to_tensor %adata : memref<?x?xf32>
-    %b = bufferization.to_tensor %bdata : memref<?x?xf32>
-    %x = bufferization.to_tensor %xdata : memref<?x?xf32>
 
     // Read the sparse matrix from file, construct sparse storage.
     %fileName = call @getTensorFilename(%c0) : (index) -> (!Filename)
@@ -109,16 +111,12 @@
     // CHECK: ( 164, 0, 0, 640, 0 )
     // CHECK: ( 0, 520, 0, 0, 1250 )
     //
-    %r = bufferization.to_memref %0 : memref<?x?xf32>
     scf.for %i = %c0 to %c5 step %c1 {
-      %v = vector.transfer_read %r[%i, %c0], %d0: memref<?x?xf32>, vector<5xf32>
+      %v = vector.transfer_read %0[%i, %c0], %d0: tensor<?x?xf32>, vector<5xf32>
       vector.print %v : vector<5xf32>
     }
 
     // Release the resources.
-    memref.dealloc %adata : memref<?x?xf32>
-    memref.dealloc %bdata : memref<?x?xf32>
-    memref.dealloc %xdata : memref<?x?xf32>
     bufferization.dealloc_tensor %s : tensor<?x?xf32, #SparseMatrix>
 
     return
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir
@@ -190,14 +190,12 @@
     //
     // CHECK-NEXT: ( 96, 192, 0, 0 )
     //
-    %m0 = bufferization.to_memref %0 : memref<8x8xf64>
-    %m1 = bufferization.to_memref %1 : memref<8x8xf64>
     %m2 = sparse_tensor.values %2 : tensor<8x8xf64, #SM> to memref<?xf64>
     %m3 = sparse_tensor.values %3 : tensor<8x8xf64, #SM> to memref<?xf64>
-    %v0 = vector.transfer_read %m0[%c0, %c0], %d0
-        : memref<8x8xf64>, vector<8x8xf64>
-    %v1 = vector.transfer_read %m1[%c0, %c0], %d0
-        : memref<8x8xf64>, vector<8x8xf64>
+    %v0 = vector.transfer_read %0[%c0, %c0], %d0
+        : tensor<8x8xf64>, vector<8x8xf64>
+    %v1 = vector.transfer_read %1[%c0, %c0], %d0
+        : tensor<8x8xf64>, vector<8x8xf64>
     %v2 = vector.transfer_read %m2[%c0], %d0 : memref<?xf64>, vector<4xf64>
     %v3 = vector.transfer_read %m3[%c0], %d0 : memref<?xf64>, vector<4xf64>
     vector.print %v0 : vector<8x8xf64>
@@ -207,8 +205,8 @@
 
     // Release the resources.
     bufferization.dealloc_tensor %s : tensor<8x8xf64, #SM>
-    memref.dealloc %m0 : memref<8x8xf64>
-    memref.dealloc %m1 : memref<8x8xf64>
+    bufferization.dealloc_tensor %0 : tensor<8x8xf64>
+    bufferization.dealloc_tensor %1 : tensor<8x8xf64>
     bufferization.dealloc_tensor %2 : tensor<8x8xf64, #SM>
     bufferization.dealloc_tensor %3 : tensor<8x8xf64, #SM>
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir
@@ -71,25 +71,27 @@
     %a = sparse_tensor.new %fileName : !Filename to tensor<?x?xf64, #SparseMatrix>
 
     // Initialize dense vectors.
-    %bdata = memref.alloc(%c256, %c4) : memref<?x?xf64>
-    %xdata = memref.alloc(%c4, %c4) : memref<?x?xf64>
-    scf.for %i = %c0 to %c256 step %c1 {
-      scf.for %j = %c0 to %c4 step %c1 {
+    %init_256_4 = bufferization.alloc_tensor(%c256, %c4) : tensor<?x?xf64>
+    %b = scf.for %i = %c0 to %c256 step %c1 iter_args(%t = %init_256_4) -> tensor<?x?xf64> {
+      %b2 = scf.for %j = %c0 to %c4 step %c1 iter_args(%t2 = %t) -> tensor<?x?xf64> {
         %k0 = arith.muli %i, %c4 : index
         %k1 = arith.addi %j, %k0 : index
         %k2 = arith.index_cast %k1 : index to i32
         %k = arith.sitofp %k2 : i32 to f64
-        memref.store %k, %bdata[%i, %j] : memref<?x?xf64>
+        %t3 = tensor.insert %k into %t2[%i, %j] : tensor<?x?xf64>
+        scf.yield %t3 : tensor<?x?xf64>
       }
+      scf.yield %b2 : tensor<?x?xf64>
     }
-    scf.for %i = %c0 to %c4 step %c1 {
-      scf.for %j = %c0 to %c4 step %c1 {
-        memref.store %i0, %xdata[%i, %j] : memref<?x?xf64>
+    %init_4_4 = bufferization.alloc_tensor(%c4, %c4) : tensor<?x?xf64>
+    %x = scf.for %i = %c0 to %c4 step %c1 iter_args(%t = %init_4_4) -> tensor<?x?xf64> {
+      %x2 = scf.for %j = %c0 to %c4 step %c1 iter_args(%t2 = %t) -> tensor<?x?xf64> {
+        %t3 = tensor.insert %i0 into %t2[%i, %j] : tensor<?x?xf64>
+        scf.yield %t3 : tensor<?x?xf64>
       }
+      scf.yield %x2 : tensor<?x?xf64>
     }
-    %b = bufferization.to_tensor %bdata : memref<?x?xf64>
-    %x = bufferization.to_tensor %xdata : memref<?x?xf64>
-
+  
     // Call kernel.
     %0 = call @kernel_spmm(%a, %b, %x)
       : (tensor<?x?xf64, #SparseMatrix>, tensor<?x?xf64>, tensor<?x?xf64>) -> tensor<?x?xf64>
@@ -98,13 +100,10 @@
     //
     // CHECK: ( ( 3548, 3550, 3552, 3554 ), ( 6052, 6053, 6054, 6055 ), ( -56, -63, -70, -77 ), ( -13704, -13709, -13714, -13719 ) )
     //
-    %m = bufferization.to_memref %0 : memref<?x?xf64>
-    %v = vector.transfer_read %m[%c0, %c0], %i0: memref<?x?xf64>, vector<4x4xf64>
+    %v = vector.transfer_read %0[%c0, %c0], %i0: tensor<?x?xf64>, vector<4x4xf64>
     vector.print %v : vector<4x4xf64>
 
     // Release the resources.
-    memref.dealloc %bdata : memref<?x?xf64>
-    memref.dealloc %xdata : memref<?x?xf64>
     bufferization.dealloc_tensor %a : tensor<?x?xf64, #SparseMatrix>
 
     return
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir
@@ -52,9 +52,7 @@
     %d0 = arith.constant 0.0 : bf16
     // Setup memory for a single reduction scalar,
     // initialized to zero.
-    %xdata = memref.alloc() : memref<bf16>
-    memref.store %d0, %xdata[] : memref<bf16>
-    %x = bufferization.to_tensor %xdata : memref<bf16>
+    %x = tensor.from_elements %d0 : tensor<bf16>
 
     // Call the kernel.
     %0 = call @kernel_sum_reduce(%a, %x)
@@ -64,13 +62,11 @@
     //
     // CHECK: 13.5
     //
-    %m = bufferization.to_memref %0 : memref<bf16>
-    %v = memref.load %m[] : memref<bf16>
+    %v = tensor.extract %0[] : tensor<bf16>
     %vf = arith.extf %v: bf16 to f32
     vector.print %vf : f32
 
     // Release the resources.
-    memref.dealloc %xdata : memref<bf16>
     bufferization.dealloc_tensor %a : tensor<?x?xbf16, #SparseMatrix>
 
     return
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir
@@ -52,9 +52,7 @@
     %d0 = arith.constant 0.0 : f16
     // Setup memory for a single reduction scalar,
     // initialized to zero.
-    %xdata = memref.alloc() : memref<f16>
-    memref.store %d0, %xdata[] : memref<f16>
-    %x = bufferization.to_tensor %xdata : memref<f16>
+    %x = tensor.from_elements %d0 : tensor<f16>
 
     // Call the kernel.
     %0 = call @kernel_sum_reduce(%a, %x)
@@ -64,13 +62,11 @@
     //
     // CHECK: 13.5
     //
-    %m = bufferization.to_memref %0 : memref<f16>
-    %v = memref.load %m[] : memref<f16>
+    %v = tensor.extract %0[] : tensor<f16>
     %vf = arith.extf %v: f16 to f32
     vector.print %vf : f32
 
     // Release the resources.
-    memref.dealloc %xdata : memref<f16>
     bufferization.dealloc_tensor %a : tensor<?x?xf16, #SparseMatrix>
 
     return
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir
@@ -219,8 +219,7 @@
     %m4 = sparse_tensor.values %4 : tensor<?xf64, #DenseVector> to memref<?xf64>
     %v4 = vector.load %m4[%c0]: memref<?xf64>, vector<32xf64>
     vector.print %v4 : vector<32xf64>
-    %m5 = bufferization.to_memref %5 : memref<f64>
-    %v5 = memref.load %m5[] : memref<f64>
+    %v5 = tensor.extract %5[] : tensor<f64>
     vector.print %v5 : f64
 
     // Release the resources.