diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_triangular_bin.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_triangular_bin.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_triangular_bin.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_triangular_bin.mlir
@@ -16,7 +16,62 @@
   doc = "X(i,j) = A(i,j) OP B(i,j)"
 }
 
+//
+// Contains test cases for sparse_tensor.binary operation (different cases when left/right/overlap
+// is empty/identity, etc).
+//
+
 module {
+  // Tensor addition (use semi-ring binary operation).
+  func.func @add_tensor_1(%A: tensor<4x4xf64, #SparseMatrix>,
+                          %B: tensor<4x4xf64, #SparseMatrix>) -> tensor<4x4xf64, #SparseMatrix> {
+    %C = bufferization.alloc_tensor() : tensor<4x4xf64, #SparseMatrix>
+    %0 = linalg.generic #trait_op
+      ins(%A, %B: tensor<4x4xf64, #SparseMatrix>,
+                  tensor<4x4xf64, #SparseMatrix>)
+      outs(%C: tensor<4x4xf64, #SparseMatrix>) {
+        ^bb0(%a: f64, %b: f64, %c: f64) :
+          %result = sparse_tensor.binary %a, %b : f64, f64 to f64
+            overlap={
+              ^bb0(%x: f64, %y: f64):
+                %ret = arith.addf %x, %y : f64
+                sparse_tensor.yield %ret : f64
+            }
+            left=identity
+            right=identity
+          linalg.yield %result : f64
+      } -> tensor<4x4xf64, #SparseMatrix>
+    return %0 : tensor<4x4xf64, #SparseMatrix>
+  }
+
+  // Same as @add_tensor_1, but use sparse_tensor.yield instead of identity to yield value.
+  func.func @add_tensor_2(%A: tensor<4x4xf64, #SparseMatrix>,
+                          %B: tensor<4x4xf64, #SparseMatrix>) -> tensor<4x4xf64, #SparseMatrix> {
+    %C = bufferization.alloc_tensor() : tensor<4x4xf64, #SparseMatrix>
+    %0 = linalg.generic #trait_op
+      ins(%A, %B: tensor<4x4xf64, #SparseMatrix>,
+                  tensor<4x4xf64, #SparseMatrix>)
+      outs(%C: tensor<4x4xf64, #SparseMatrix>) {
+        ^bb0(%a: f64, %b: f64, %c: f64) :
+          %result = sparse_tensor.binary %a, %b : f64, f64 to f64
+            overlap={
+              ^bb0(%x: f64, %y: f64):
+                %ret = arith.addf %x, %y : f64
+                sparse_tensor.yield %ret : f64
+            }
+            left={
+              ^bb0(%x: f64):
+                sparse_tensor.yield %x : f64
+            }
+            right={
+              ^bb0(%y: f64):
+                sparse_tensor.yield %y : f64
+            }
+          linalg.yield %result : f64
+      } -> tensor<4x4xf64, #SparseMatrix>
+    return %0 : tensor<4x4xf64, #SparseMatrix>
+  }
+  
   // Performs triangular add/sub operation (using semi-ring binary op).
   func.func @triangular(%A: tensor<4x4xf64, #SparseMatrix>,
                         %B: tensor<4x4xf64, #SparseMatrix>) -> tensor<4x4xf64, #SparseMatrix> {
@@ -50,7 +105,141 @@
     return %0 : tensor<4x4xf64, #SparseMatrix>
   }
 
-  // Driver method to call and verify triangular kernel.
+  // Perform sub operation (using semi-ring binary op) with a constant threshold.
+  func.func @sub_with_thres(%A: tensor<4x4xf64, #SparseMatrix>,
+                            %B: tensor<4x4xf64, #SparseMatrix>) -> tensor<4x4xf64, #SparseMatrix> {
+    %C = bufferization.alloc_tensor() : tensor<4x4xf64, #SparseMatrix>
+    // Defines out-block constant bounds.
+    %thres_out_up = arith.constant 2.0 : f64
+    %thres_out_lo = arith.constant -2.0 : f64
+    
+    %0 = linalg.generic #trait_op
+      ins(%A, %B: tensor<4x4xf64, #SparseMatrix>,
+                  tensor<4x4xf64, #SparseMatrix>)
+      outs(%C: tensor<4x4xf64, #SparseMatrix>) {
+        ^bb0(%a: f64, %b: f64, %c: f64) :
+          %result = sparse_tensor.binary %a, %b : f64, f64 to f64
+            overlap={
+              ^bb0(%x: f64, %y: f64):
+                // Defines in-block constant bounds.
+                %thres_up = arith.constant 1.0 : f64
+                %thres_lo = arith.constant -1.0 : f64
+                %result = arith.subf %x, %y : f64
+                %cmp = arith.cmpf "oge", %result, %thres_up : f64
+                %tmp = arith.select %cmp, %thres_up, %result : f64
+                %cmp1 = arith.cmpf "ole", %tmp, %thres_lo : f64
+                %ret = arith.select %cmp1, %thres_lo, %tmp : f64
+                sparse_tensor.yield %ret : f64
+            }
+            left={
+              ^bb0(%x: f64):
+                // Uses out-block constant bounds.
+                %cmp = arith.cmpf "oge", %x, %thres_out_up : f64
+                %tmp = arith.select %cmp, %thres_out_up, %x : f64
+                %cmp1 = arith.cmpf "ole", %tmp, %thres_out_lo : f64
+                %ret = arith.select %cmp1, %thres_out_lo, %tmp : f64
+                sparse_tensor.yield %ret : f64
+            }
+            right={
+              ^bb0(%y: f64):
+                %ny = arith.negf %y : f64
+                %cmp = arith.cmpf "oge", %ny, %thres_out_up : f64
+                %tmp = arith.select %cmp, %thres_out_up, %ny : f64
+                %cmp1 = arith.cmpf "ole", %tmp, %thres_out_lo : f64
+                %ret = arith.select %cmp1, %thres_out_lo, %tmp : f64
+                sparse_tensor.yield %ret : f64          
+            }
+          linalg.yield %result : f64
+      } -> tensor<4x4xf64, #SparseMatrix>
+    return %0 : tensor<4x4xf64, #SparseMatrix>
+  }
+
+  // Performs isEqual only on intersecting elements.
+  func.func @intersect_equal(%A: tensor<4x4xf64, #SparseMatrix>,
+                             %B: tensor<4x4xf64, #SparseMatrix>) -> tensor<4x4xi8, #SparseMatrix> {
+    %C = bufferization.alloc_tensor() : tensor<4x4xi8, #SparseMatrix>
+    %0 = linalg.generic #trait_op
+      ins(%A, %B: tensor<4x4xf64, #SparseMatrix>,
+                  tensor<4x4xf64, #SparseMatrix>)
+      outs(%C: tensor<4x4xi8, #SparseMatrix>) {
+        ^bb0(%a: f64, %b: f64, %c: i8) :
+          %result = sparse_tensor.binary %a, %b : f64, f64 to i8
+            overlap={
+              ^bb0(%x: f64, %y: f64):
+                %cmp = arith.cmpf "oeq", %x, %y : f64
+                %ret = arith.extui %cmp : i1 to i8
+                sparse_tensor.yield %ret : i8
+            }
+            left={}
+            right={}
+          linalg.yield %result : i8
+      } -> tensor<4x4xi8, #SparseMatrix>
+    return %0 : tensor<4x4xi8, #SparseMatrix>
+  }
+
+  // Keeps values on left, negate value on right, ignore value when overlapping.
+  func.func @only_left_right(%A: tensor<4x4xf64, #SparseMatrix>,
+                             %B: tensor<4x4xf64, #SparseMatrix>) -> tensor<4x4xf64, #SparseMatrix> {
+    %C = bufferization.alloc_tensor() : tensor<4x4xf64, #SparseMatrix>
+    %0 = linalg.generic #trait_op
+      ins(%A, %B: tensor<4x4xf64, #SparseMatrix>,
+                  tensor<4x4xf64, #SparseMatrix>)
+      outs(%C: tensor<4x4xf64, #SparseMatrix>) {
+        ^bb0(%a: f64, %b: f64, %c: f64) :
+          %result = sparse_tensor.binary %a, %b : f64, f64 to f64
+            overlap={}
+            left=identity
+            right={
+              ^bb0(%y: f64):
+                %ret = arith.negf %y : f64
+                sparse_tensor.yield %ret : f64
+            }
+          linalg.yield %result : f64
+      } -> tensor<4x4xf64, #SparseMatrix>
+    return %0 : tensor<4x4xf64, #SparseMatrix>
+  }
+  
+  //
+  // Utility functions to print the value of a tensor.
+  //
+  
+  func.func @print_result(%A: tensor<4x4xf64, #SparseMatrix>) {
+    %c0 = arith.constant 0 : index
+    %du = arith.constant -1.0 : f64
+
+    %c = sparse_tensor.convert %A : tensor<4x4xf64, #SparseMatrix> to tensor<4x4xf64>
+    %m = bufferization.to_memref %c : memref<4x4xf64>
+    %v = vector.transfer_read %m[%c0, %c0], %du: memref<4x4xf64>, vector<4x4xf64>
+    vector.print %v : vector<4x4xf64>
+    
+    %1 = sparse_tensor.values %A : tensor<4x4xf64, #SparseMatrix> to memref<?xf64>
+    %2 = vector.transfer_read %1[%c0], %du: memref<?xf64>, vector<16xf64>
+    vector.print %2 : vector<16xf64>
+    
+    // Release the resources.
+    memref.dealloc %m : memref<4x4xf64>
+    return
+  }
+
+  func.func @print_result_i8(%A: tensor<4x4xi8, #SparseMatrix>) {
+    %c0 = arith.constant 0 : index
+    %du = arith.constant -1 : i8
+
+    %c = sparse_tensor.convert %A : tensor<4x4xi8, #SparseMatrix> to tensor<4x4xi8>
+    %m = bufferization.to_memref %c : memref<4x4xi8>
+    %v = vector.transfer_read %m[%c0, %c0], %du: memref<4x4xi8>, vector<4x4xi8>
+    vector.print %v : vector<4x4xi8>
+    
+    %1 = sparse_tensor.values %A : tensor<4x4xi8, #SparseMatrix> to memref<?xi8>
+    %2 = vector.transfer_read %1[%c0], %du: memref<?xi8>, vector<16xi8>
+    vector.print %2 : vector<16xi8>
+    
+    // Release the resources.
+    memref.dealloc %m : memref<4x4xi8>
+    return
+  }
+  
+  // Driver method to call and verify kernels.
   func.func @entry() {
     %c0 = arith.constant 0 : index
     %du = arith.constant -1.0 : f64
@@ -68,28 +257,67 @@
 
     %a = sparse_tensor.convert %am : tensor<4x4xf64> to tensor<4x4xf64, #SparseMatrix>
     %b = sparse_tensor.convert %bm : tensor<4x4xf64> to tensor<4x4xf64, #SparseMatrix>
-    %0 = call @triangular(%a, %b) : (tensor<4x4xf64, #SparseMatrix>,
+
+    %0 = call @add_tensor_1(%a, %b) : (tensor<4x4xf64, #SparseMatrix>,
+                                       tensor<4x4xf64, #SparseMatrix>) -> tensor<4x4xf64, #SparseMatrix>
+    // Verify the results:                                     
+    //
+    // CHECK:     ( ( 2, 0, 4, 1 ), ( 0, 2.5, 0, 0 ), ( 1, 5, 2, 4 ), ( 5, 4, 0, 0 ) )
+    // CHECK-NEXT:  ( 2, 4, 1, 2.5, 1, 5, 2, 4, 5, 4, -1, -1, -1, -1, -1, -1 )
+    //
+    call @print_result(%0) : (tensor<4x4xf64, #SparseMatrix>) -> ()
+
+    %1 = call @add_tensor_2(%a, %b) : (tensor<4x4xf64, #SparseMatrix>,
+                                       tensor<4x4xf64, #SparseMatrix>) -> tensor<4x4xf64, #SparseMatrix>
+    //
+    // CHECK-NEXT:( ( 2, 0, 4, 1 ), ( 0, 2.5, 0, 0 ), ( 1, 5, 2, 4 ), ( 5, 4, 0, 0 ) )
+    // CHECK-NEXT:  ( 2, 4, 1, 2.5, 1, 5, 2, 4, 5, 4, -1, -1, -1, -1, -1, -1 )
+    //
+    call @print_result(%1) : (tensor<4x4xf64, #SparseMatrix>) -> ()
+    
+    %2 = call @triangular(%a, %b) : (tensor<4x4xf64, #SparseMatrix>,
                                      tensor<4x4xf64, #SparseMatrix>) -> tensor<4x4xf64, #SparseMatrix>
+    //
+    // CHECK-NEXT:( ( 2, 0, 4, 1 ), ( 0, 2.5, 0, 0 ), ( -1, -5, 2, 4 ), ( 1, 4, 0, 0 ) )
+    // CHECK-NEXT:  ( 2, 4, 1, 2.5, -1, -5, 2, 4, 1, 4, -1, -1, -1, -1, -1, -1 )
+    //
+    call @print_result(%2) : (tensor<4x4xf64, #SparseMatrix>) -> ()
 
+    %3 = call @sub_with_thres(%a, %b) : (tensor<4x4xf64, #SparseMatrix>,
+                                         tensor<4x4xf64, #SparseMatrix>) -> tensor<4x4xf64, #SparseMatrix>
     //
-    // Verify the results.
+    // CHECK-NEXT:( ( 0, 0, 1, -1 ), ( 0, 1, 0, 0 ), ( -1, -2, -2, 2 ), ( 1, 2, 0, 0 ) )
+    // CHECK-NEXT:  ( 0, 1, -1, 1, -1, -2, -2, 2, 1, 2, -1, -1, -1, -1, -1, -1 )
     //
-    // CHECK:    ( ( 2, 0, 4, 1 ), ( 0, 2.5, 0, 0 ), ( -1, -5, 2, 4 ), ( 1, 4, 0, 0 ) )
-    // CHECK-NEXT: ( 2, 4, 1, 2.5, -1, -5, 2, 4, 1, 4, -1, -1, -1, -1, -1, -1 )
+    call @print_result(%3) : (tensor<4x4xf64, #SparseMatrix>) -> ()
+
+    // There are only four pairs of intersecting elements between %a and %b,
+    // with one of those being equal
+    %4 = call @intersect_equal(%a, %b): (tensor<4x4xf64, #SparseMatrix>,
+                                         tensor<4x4xf64, #SparseMatrix>) -> tensor<4x4xi8, #SparseMatrix>
     //
-    %c = sparse_tensor.convert %0 : tensor<4x4xf64, #SparseMatrix> to tensor<4x4xf64>
-    %m = bufferization.to_memref %c : memref<4x4xf64>
-    %v = vector.transfer_read %m[%c0, %c0], %du: memref<4x4xf64>, vector<4x4xf64>
-    vector.print %v : vector<4x4xf64>
-    %1 = sparse_tensor.values %0 : tensor<4x4xf64, #SparseMatrix> to memref<?xf64>
-    %2 = vector.transfer_read %1[%c0], %du: memref<?xf64>, vector<16xf64>
-    vector.print %2 : vector<16xf64>
+    // CHECK-NEXT:( ( 1, 0, 0, 0 ), ( 0, 0, 0, 0 ), ( 0, 0, 0, 0 ), ( 0, 0, 0, 0 ) )
+    // CHECK-NEXT:  ( 1, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 )
+    //
+    call @print_result_i8(%4) : (tensor<4x4xi8, #SparseMatrix>) -> ()
 
+    %5 = call @only_left_right(%a, %b) : (tensor<4x4xf64, #SparseMatrix>,
+                                          tensor<4x4xf64, #SparseMatrix>) -> tensor<4x4xf64, #SparseMatrix>
+    //                                          
+    // CHECK-NEXT:( ( 0, 0, 0, -1 ), ( 0, 0, 0, 0 ), ( -1, -5, -2, 4 ), ( 0, 4, 0, 0 ) )
+    // CHECK-NEXT:  ( -1, -1, -5, -2, 4, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 )
+    //
+    call @print_result(%5) : (tensor<4x4xf64, #SparseMatrix>) -> ()
+    
     // Release the resources.
-    memref.dealloc %m : memref<4x4xf64>
     sparse_tensor.release %a : tensor<4x4xf64, #SparseMatrix>
     sparse_tensor.release %b : tensor<4x4xf64, #SparseMatrix>
     sparse_tensor.release %0 : tensor<4x4xf64, #SparseMatrix>
+    sparse_tensor.release %1 : tensor<4x4xf64, #SparseMatrix>
+    sparse_tensor.release %2 : tensor<4x4xf64, #SparseMatrix>
+    sparse_tensor.release %3 : tensor<4x4xf64, #SparseMatrix>
+    sparse_tensor.release %4 : tensor<4x4xi8, #SparseMatrix>
+    sparse_tensor.release %5 : tensor<4x4xf64, #SparseMatrix>
     return
   }
 }