diff --git a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h
--- a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h
@@ -230,6 +230,7 @@
                  Value v1);
 
 private:
+  bool isZero(unsigned e) const;
   bool maybeZero(unsigned e) const;
   bool isInvariant(unsigned e) const;
   Type inferType(unsigned e, Value src);
diff --git a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp
--- a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp
@@ -489,6 +489,11 @@
     //  ---+---+---+    ---+---+---+
     //  !x | 0 | y |    !x | 0 |-y |
     //   x | x |x+y|     x | x |x-y|
+    //
+    // TODO: remove this zero "folding" in favor of external pass into linalg
+    //
+    if (isZero(tensorExps[e].children.e1))
+      return buildLattices(tensorExps[e].children.e0, i);
     return takeDisj(kind, // take binary disjunction
                     buildLattices(tensorExps[e].children.e0, i),
                     buildLattices(tensorExps[e].children.e1, i));
@@ -511,6 +516,18 @@
   return buildTensorExp(op, yield->getOperand(0));
 }
 
+/// Only returns true if we are certain this is a zero.
+bool Merger::isZero(unsigned e) const {
+  if (tensorExps[e].kind == kInvariant) {
+    if (auto c = tensorExps[e].val.getDefiningOp<ConstantIntOp>())
+      return c.getValue() == 0;
+    if (auto c = tensorExps[e].val.getDefiningOp<ConstantFloatOp>())
+      return c.getValue().isZero();
+  }
+  return false;
+}
+
+/// Only returns false if we are certain this is a nonzero.
 bool Merger::maybeZero(unsigned e) const {
   if (tensorExps[e].kind == kInvariant) {
     if (auto c = tensorExps[e].val.getDefiningOp<ConstantIntOp>())
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir
new file mode 100644
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir
@@ -0,0 +1,76 @@
+// RUN: mlir-opt %s \
+// RUN:   --linalg-generalize-named-ops \
+// RUN:   --sparsification --sparse-tensor-conversion \
+// RUN:   --convert-vector-to-scf --convert-scf-to-std \
+// RUN:   --func-bufferize --tensor-constant-bufferize --tensor-bufferize \
+// RUN:   --std-bufferize --finalizing-bufferize --lower-affine \
+// RUN:   --convert-vector-to-llvm --convert-memref-to-llvm \
+// RUN:   --convert-std-to-llvm --reconcile-unrealized-casts | \
+// RUN: mlir-cpu-runner \
+// RUN:  -e entry -entry-point-result=void  \
+// RUN:  -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+
+#DCSR = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>
+
+// An example of a quantized sparse matmul. With the zero offset for the
+// sparse input, the sparse compiler generates very efficient code for the
+//      x(i,j) += (ext(a(i,k)) - 2) * ext(b(k,j))
+// operation.
+module {
+
+  func @quantized_matmul(%input1: tensor<5x3xi8>,
+                         %input2: tensor<3x6xi8, #DCSR>,
+                         %output: tensor<5x6xi32>) -> tensor<5x6xi32> {
+    %c0 = constant 0 : i32
+    %c2 = constant 2 : i32
+    %0 = linalg.quantized_matmul
+      ins(%input1, %input2, %c2, %c0 : tensor<5x3xi8>, tensor<3x6xi8, #DCSR>, i32, i32)
+      outs(%output : tensor<5x6xi32>) -> tensor<5x6xi32>
+    return %0: tensor<5x6xi32>
+  }
+
+  func @entry() {
+    %c0 = constant 0 : index
+    %i0 = constant 0 : i32
+
+    %input1 = constant dense<[
+      [  -128,   3,  127 ],
+      [     0,   0,    0 ],
+      [    11,   1,    0 ],
+      [     0,   5,   -1 ],
+      [    13,   0,    3 ]
+    ]> : tensor<5x3xi8>
+
+    %input2 = constant dense<[
+      [  127,   0, -128,    0,   0,   3 ],
+      [    0,   0,    0,    0,   0,   0 ],
+      [    0,   0,    0,  100,  10,   0 ]
+    ]> : tensor<3x6xi8>
+
+    %sparse_input2 = sparse_tensor.convert %input2 : tensor<3x6xi8> to tensor<3x6xi8, #DCSR>
+
+    // Call the kernel.
+    %output = constant dense<0> : tensor<5x6xi32>
+    %0 = call @quantized_matmul(%input1, %sparse_input2, %output)
+       : (tensor<5x3xi8>,
+          tensor<3x6xi8, #DCSR>,
+	  tensor<5x6xi32>) -> tensor<5x6xi32>
+
+    //
+    // Verify the output.
+    //
+    // CHECK:    ( ( -16510, 0, 16640, 12500, 1250, -390 ),
+    // CHECK-SAME: ( -254, 0, 256, -200, -20, -6 ),
+    // CHECK-SAME: ( 1143, 0, -1152, -200, -20, 27 ),
+    // CHECK-SAME: ( -254, 0, 256, -300, -30, -6 ),
+    // CHECK-SAME: ( 1397, 0, -1408, 100, 10, 33 ) )
+    //
+    %m = memref.buffer_cast %0 : memref<5x6xi32>
+    %v = vector.transfer_read %m[%c0, %c0], %i0
+      : memref<5x6xi32>, vector<5x6xi32>
+    vector.print %v : vector<5x6xi32>
+
+    return
+  }
+}