diff --git a/mlir/lib/Dialect/Linalg/Transforms/SparseLowering.cpp b/mlir/lib/Dialect/Linalg/Transforms/SparseLowering.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/SparseLowering.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/SparseLowering.cpp
@@ -132,6 +132,12 @@
       name = "sparseValuesF64";
     else if (eltType.isF32())
       name = "sparseValuesF32";
+    else if (eltType.isInteger(32))
+      name = "sparseValuesI32";
+    else if (eltType.isInteger(16))
+      name = "sparseValuesI16";
+    else if (eltType.isInteger(8))
+      name = "sparseValuesI8";
     else
       return failure();
     rewriter.replaceOpWithNewOp<CallOp>(
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp b/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp
@@ -837,11 +837,19 @@
   assert(codegen.curVecLength == 1);
   codegen.redVal = merger.exp(codegen.redExp).val = Value(); // end chain
   unsigned lhs = op.getNumShapedOperands() - 1;
-  if (red.getType().isa<VectorType>()) {
+  if (auto vtp = red.getType().dyn_cast<VectorType>()) {
     // TODO: assumes + reductions for now
+    StringAttr kind = rewriter.getStringAttr("add");
     Value ld = genTensorLoad(merger, codegen, rewriter, op, codegen.redExp);
-    red = rewriter.create<vector::ReductionOp>(
-        op.getLoc(), ld.getType(), rewriter.getStringAttr("add"), red, ld);
+    // Integer reductions don't accept an accumulator.
+    if (vtp.getElementType().isa<IntegerType>()) {
+      red = rewriter.create<vector::ReductionOp>(op.getLoc(), ld.getType(),
+                                                 kind, red, ValueRange{});
+      red = rewriter.create<AddIOp>(op.getLoc(), red, ld);
+    } else {
+      red = rewriter.create<vector::ReductionOp>(op.getLoc(), ld.getType(),
+                                                 kind, red, ld);
+    }
   }
   genTensorStore(merger, codegen, rewriter, op, lhs, red);
 }
diff --git a/mlir/lib/ExecutionEngine/SparseUtils.cpp b/mlir/lib/ExecutionEngine/SparseUtils.cpp
--- a/mlir/lib/ExecutionEngine/SparseUtils.cpp
+++ b/mlir/lib/ExecutionEngine/SparseUtils.cpp
@@ -127,6 +127,9 @@
   // Primary storage.
   virtual void getValues(std::vector<double> **) { fatal("valf64"); }
   virtual void getValues(std::vector<float> **) { fatal("valf32"); }
+  virtual void getValues(std::vector<int32_t> **) { fatal("vali32"); }
+  virtual void getValues(std::vector<int16_t> **) { fatal("vali16"); }
+  virtual void getValues(std::vector<int8_t> **) { fatal("vali8"); }
 
   virtual ~SparseTensorStorageBase() {}
 
@@ -453,64 +456,58 @@
 // implementation of a bufferized SparseTensor in MLIR. This could be replaced
 // by actual codegen in MLIR.
 //
+// Because we cannot use C++ templates with C linkage, some macro magic is used
+// to generate implementations for all required type combinations that can be
+// called from MLIR generated code.
+//
 //===----------------------------------------------------------------------===//
 
-// Cannot use templates with C linkage.
-
-struct MemRef1DU64 {
-  const uint64_t *base;
-  const uint64_t *data;
-  uint64_t off;
-  uint64_t sizes[1];
-  uint64_t strides[1];
-};
-
-struct MemRef1DU32 {
-  const uint32_t *base;
-  const uint32_t *data;
-  uint64_t off;
-  uint64_t sizes[1];
-  uint64_t strides[1];
-};
+#define TEMPLATE(NAME, TYPE)                                                   \
+  struct NAME {                                                                \
+    const TYPE *base;                                                          \
+    const TYPE *data;                                                          \
+    uint64_t off;                                                              \
+    uint64_t sizes[1];                                                         \
+    uint64_t strides[1];                                                       \
+  }
 
-struct MemRef1DU16 {
-  const uint16_t *base;
-  const uint16_t *data;
-  uint64_t off;
-  uint64_t sizes[1];
-  uint64_t strides[1];
-};
+#define CASE(p, i, v, P, I, V)                                                 \
+  if (ptrTp == (p) && indTp == (i) && valTp == (v))                            \
+  return newSparseTensor<P, I, V>(filename, sparsity, asize)
 
-struct MemRef1DU8 {
-  const uint8_t *base;
-  const uint8_t *data;
-  uint64_t off;
-  uint64_t sizes[1];
-  uint64_t strides[1];
-};
+#define IMPL1(RET, NAME, TYPE, LIB)                                            \
+  RET NAME(void *tensor) {                                                     \
+    std::vector<TYPE> *v;                                                      \
+    static_cast<SparseTensorStorageBase *>(tensor)->LIB(&v);                   \
+    return {v->data(), v->data(), 0, {v->size()}, {1}};                        \
+  }
 
-struct MemRef1DF64 {
-  const double *base;
-  const double *data;
-  uint64_t off;
-  uint64_t sizes[1];
-  uint64_t strides[1];
-};
+#define IMPL2(RET, NAME, TYPE, LIB)                                            \
+  RET NAME(void *tensor, uint64_t d) {                                         \
+    std::vector<TYPE> *v;                                                      \
+    static_cast<SparseTensorStorageBase *>(tensor)->LIB(&v, d);                \
+    return {v->data(), v->data(), 0, {v->size()}, {1}};                        \
+  }
 
-struct MemRef1DF32 {
-  const float *base;
-  const float *data;
-  uint64_t off;
-  uint64_t sizes[1];
-  uint64_t strides[1];
-};
+TEMPLATE(MemRef1DU64, uint64_t);
+TEMPLATE(MemRef1DU32, uint32_t);
+TEMPLATE(MemRef1DU16, uint16_t);
+TEMPLATE(MemRef1DU8, uint8_t);
+TEMPLATE(MemRef1DI32, int32_t);
+TEMPLATE(MemRef1DI16, int16_t);
+TEMPLATE(MemRef1DI8, int8_t);
+TEMPLATE(MemRef1DF64, double);
+TEMPLATE(MemRef1DF32, float);
 
 enum OverheadTypeEnum : uint64_t { kU64 = 1, kU32 = 2, kU16 = 3, kU8 = 4 };
-enum PrimaryTypeEnum : uint64_t { kF64 = 1, kF32 = 2 };
 
-#define CASE(p, i, v, P, I, V)                                                 \
-  if (ptrTp == (p) && indTp == (i) && valTp == (v))                            \
-  return newSparseTensor<P, I, V>(filename, sparsity, asize)
+enum PrimaryTypeEnum : uint64_t {
+  kF64 = 1,
+  kF32 = 2,
+  kI32 = 3,
+  kI16 = 4,
+  kI8 = 5
+};
 
 void *newSparseTensor(char *filename, bool *abase, bool *adata, uint64_t aoff,
                       uint64_t asize, uint64_t astride, uint64_t ptrTp,
@@ -534,6 +531,17 @@
   CASE(kU16, kU16, kF32, uint16_t, uint16_t, float);
   CASE(kU8, kU8, kF32, uint8_t, uint8_t, float);
 
+  // Integral matrices with low overhead storage.
+  CASE(kU32, kU32, kI32, uint32_t, uint32_t, int32_t);
+  CASE(kU32, kU32, kI16, uint32_t, uint32_t, int16_t);
+  CASE(kU32, kU32, kI8, uint32_t, uint32_t, int8_t);
+  CASE(kU16, kU16, kI32, uint16_t, uint16_t, int32_t);
+  CASE(kU16, kU16, kI16, uint16_t, uint16_t, int16_t);
+  CASE(kU16, kU16, kI8, uint16_t, uint16_t, int8_t);
+  CASE(kU8, kU8, kI32, uint8_t, uint8_t, int32_t);
+  CASE(kU8, kU8, kI16, uint8_t, uint8_t, int16_t);
+  CASE(kU8, kU8, kI8, uint8_t, uint8_t, int8_t);
+
   // Unsupported case (add above if needed).
   fputs("unsupported combination of types\n", stderr);
   exit(1);
@@ -545,70 +553,29 @@
   return static_cast<SparseTensorStorageBase *>(tensor)->getDimSize(d);
 }
 
-MemRef1DU64 sparsePointers64(void *tensor, uint64_t d) {
-  std::vector<uint64_t> *v;
-  static_cast<SparseTensorStorageBase *>(tensor)->getPointers(&v, d);
-  return {v->data(), v->data(), 0, {v->size()}, {1}};
-}
-
-MemRef1DU32 sparsePointers32(void *tensor, uint64_t d) {
-  std::vector<uint32_t> *v;
-  static_cast<SparseTensorStorageBase *>(tensor)->getPointers(&v, d);
-  return {v->data(), v->data(), 0, {v->size()}, {1}};
-}
-
-MemRef1DU16 sparsePointers16(void *tensor, uint64_t d) {
-  std::vector<uint16_t> *v;
-  static_cast<SparseTensorStorageBase *>(tensor)->getPointers(&v, d);
-  return {v->data(), v->data(), 0, {v->size()}, {1}};
-}
-
-MemRef1DU8 sparsePointers8(void *tensor, uint64_t d) {
-  std::vector<uint8_t> *v;
-  static_cast<SparseTensorStorageBase *>(tensor)->getPointers(&v, d);
-  return {v->data(), v->data(), 0, {v->size()}, {1}};
-}
-
-MemRef1DU64 sparseIndices64(void *tensor, uint64_t d) {
-  std::vector<uint64_t> *v;
-  static_cast<SparseTensorStorageBase *>(tensor)->getIndices(&v, d);
-  return {v->data(), v->data(), 0, {v->size()}, {1}};
-}
-
-MemRef1DU32 sparseIndices32(void *tensor, uint64_t d) {
-  std::vector<uint32_t> *v;
-  static_cast<SparseTensorStorageBase *>(tensor)->getIndices(&v, d);
-  return {v->data(), v->data(), 0, {v->size()}, {1}};
-}
-
-MemRef1DU16 sparseIndices16(void *tensor, uint64_t d) {
-  std::vector<uint16_t> *v;
-  static_cast<SparseTensorStorageBase *>(tensor)->getIndices(&v, d);
-  return {v->data(), v->data(), 0, {v->size()}, {1}};
-}
-
-MemRef1DU8 sparseIndices8(void *tensor, uint64_t d) {
-  std::vector<uint8_t> *v;
-  static_cast<SparseTensorStorageBase *>(tensor)->getIndices(&v, d);
-  return {v->data(), v->data(), 0, {v->size()}, {1}};
-}
-
-MemRef1DF64 sparseValuesF64(void *tensor) {
-  std::vector<double> *v;
-  static_cast<SparseTensorStorageBase *>(tensor)->getValues(&v);
-  return {v->data(), v->data(), 0, {v->size()}, {1}};
-}
-
-MemRef1DF32 sparseValuesF32(void *tensor) {
-  std::vector<float> *v;
-  static_cast<SparseTensorStorageBase *>(tensor)->getValues(&v);
-  return {v->data(), v->data(), 0, {v->size()}, {1}};
-}
+IMPL2(MemRef1DU64, sparsePointers64, uint64_t, getPointers)
+IMPL2(MemRef1DU32, sparsePointers32, uint32_t, getPointers)
+IMPL2(MemRef1DU16, sparsePointers16, uint16_t, getPointers)
+IMPL2(MemRef1DU8, sparsePointers8, uint8_t, getPointers)
+IMPL2(MemRef1DU64, sparseIndices64, uint64_t, getIndices)
+IMPL2(MemRef1DU32, sparseIndices32, uint32_t, getIndices)
+IMPL2(MemRef1DU16, sparseIndices16, uint16_t, getIndices)
+IMPL2(MemRef1DU8, sparseIndices8, uint8_t, getIndices)
+IMPL1(MemRef1DF64, sparseValuesF64, double, getValues)
+IMPL1(MemRef1DF32, sparseValuesF32, float, getValues)
+IMPL1(MemRef1DI32, sparseValuesI32, int32_t, getValues)
+IMPL1(MemRef1DI16, sparseValuesI16, int16_t, getValues)
+IMPL1(MemRef1DI8, sparseValuesI8, int8_t, getValues)
 
 void delSparseTensor(void *tensor) {
   delete static_cast<SparseTensorStorageBase *>(tensor);
 }
 
+#undef TEMPLATE
+#undef CASE
+#undef IMPL1
+#undef IMPL2
+
 } // extern "C"
 
 #endif // MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS
diff --git a/mlir/test/Integration/Sparse/CPU/sparse_matvec.mlir b/mlir/test/Integration/Sparse/CPU/sparse_matvec.mlir
--- a/mlir/test/Integration/Sparse/CPU/sparse_matvec.mlir
+++ b/mlir/test/Integration/Sparse/CPU/sparse_matvec.mlir
@@ -54,18 +54,18 @@
   // a sparse matrix A with a dense vector b into a dense vector x.
   //
   func @kernel_matvec(%argA: !SparseTensor,
-                      %argb: tensor<?xf32>,
-                      %argx: tensor<?xf32>) -> tensor<?xf32> {
-    %arga = linalg.sparse_tensor %argA : !SparseTensor to tensor<?x?xf32>
+                      %argb: tensor<?xi32>,
+                      %argx: tensor<?xi32>) -> tensor<?xi32> {
+    %arga = linalg.sparse_tensor %argA : !SparseTensor to tensor<?x?xi32>
     %0 = linalg.generic #matvec
-      ins(%arga, %argb: tensor<?x?xf32>, tensor<?xf32>)
-      outs(%argx: tensor<?xf32>) {
-      ^bb(%a: f32, %b: f32, %x: f32):
-        %0 = mulf %a, %b : f32
-        %1 = addf %x, %0 : f32
-        linalg.yield %1 : f32
-    } -> tensor<?xf32>
-    return %0 : tensor<?xf32>
+      ins(%arga, %argb: tensor<?x?xi32>, tensor<?xi32>)
+      outs(%argx: tensor<?xi32>) {
+      ^bb(%a: i32, %b: i32, %x: i32):
+        %0 = muli %a, %b : i32
+        %1 = addi %x, %0 : i32
+        linalg.yield %1 : i32
+    } -> tensor<?xi32>
+    return %0 : tensor<?xi32>
   }
 
   //
@@ -79,7 +79,7 @@
   // Main driver that reads matrix from file and calls the sparse kernel.
   //
   func @entry() {
-    %f0 = constant 0.0 : f32
+    %i0 = constant 0 : i32
     %c0 = constant 0 : index
     %c1 = constant 1 : index
     %c2 = constant 2 : index
@@ -89,51 +89,51 @@
     // Mark inner dimension of the matrix as sparse and encode the
     // storage scheme types (this must match the metadata in the
     // alias above and compiler switches). In this case, we test
-    // that 8-bit indices and pointers work correctly.
+    // that 8-bit indices and pointers work correctly on a matrix
+    // with i32 elements.
     %annotations = memref.alloc(%c2) : memref<?xi1>
     %sparse = constant true
     %dense = constant false
     memref.store %dense, %annotations[%c0] : memref<?xi1>
     memref.store %sparse, %annotations[%c1] : memref<?xi1>
     %u8 = constant 4 : index
-    %f32 = constant 2 : index
+    %i32 = constant 3 : index
 
     // Read the sparse matrix from file, construct sparse storage.
     %fileName = call @getTensorFilename(%c0) : (index) -> (!Filename)
-    %a = call @newSparseTensor(%fileName, %annotations, %u8, %u8, %f32)
+    %a = call @newSparseTensor(%fileName, %annotations, %u8, %u8, %i32)
       : (!Filename, memref<?xi1>, index, index, index) -> (!SparseTensor)
 
     // Initialize dense vectors.
-    %bdata = memref.alloc(%c256) : memref<?xf32>
-    %xdata = memref.alloc(%c4) : memref<?xf32>
+    %bdata = memref.alloc(%c256) : memref<?xi32>
+    %xdata = memref.alloc(%c4) : memref<?xi32>
     scf.for %i = %c0 to %c256 step %c1 {
       %k = addi %i, %c1 : index
-      %l = index_cast %k : index to i32
-      %f = sitofp %l : i32 to f32
-      memref.store %f, %bdata[%i] : memref<?xf32>
+      %j = index_cast %k : index to i32
+      memref.store %j, %bdata[%i] : memref<?xi32>
     }
     scf.for %i = %c0 to %c4 step %c1 {
-      memref.store %f0, %xdata[%i] : memref<?xf32>
+      memref.store %i0, %xdata[%i] : memref<?xi32>
     }
-    %b = memref.tensor_load %bdata : memref<?xf32>
-    %x = memref.tensor_load %xdata : memref<?xf32>
+    %b = memref.tensor_load %bdata : memref<?xi32>
+    %x = memref.tensor_load %xdata : memref<?xi32>
 
     // Call kernel.
     %0 = call @kernel_matvec(%a, %b, %x)
-      : (!SparseTensor, tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
+      : (!SparseTensor, tensor<?xi32>, tensor<?xi32>) -> tensor<?xi32>
 
     // Print the result for verification.
     //
-    // CHECK: ( 1659, 1534, 21, 18315 )
+    // CHECK: ( 889, 1514, -21, -3431 )
     //
-    %m = memref.buffer_cast %0 : memref<?xf32>
-    %v = vector.transfer_read %m[%c0], %f0: memref<?xf32>, vector<4xf32>
-    vector.print %v : vector<4xf32>
+    %m = memref.buffer_cast %0 : memref<?xi32>
+    %v = vector.transfer_read %m[%c0], %i0: memref<?xi32>, vector<4xi32>
+    vector.print %v : vector<4xi32>
 
     // Release the resources.
     call @delSparseTensor(%a) : (!SparseTensor) -> ()
-    memref.dealloc %bdata : memref<?xf32>
-    memref.dealloc %xdata : memref<?xf32>
+    memref.dealloc %bdata : memref<?xi32>
+    memref.dealloc %xdata : memref<?xi32>
 
     return
   }
diff --git a/mlir/test/Integration/data/wide.mtx b/mlir/test/Integration/data/wide.mtx
--- a/mlir/test/Integration/data/wide.mtx
+++ b/mlir/test/Integration/data/wide.mtx
@@ -4,20 +4,20 @@
 % see https://math.nist.gov/MatrixMarket
 %
 4 256 17
-1 1     1.0
-1 127   2.0
-1 128   3.0
-1 255   4.0
-2 2     5.0
-2 254   6.0
-3 3     7.0
-4 1     8.0
-4 2     9.0
-4 4    10.0
-4 99   11.0
-4 127  12.0
-4 128  13.0
-4 129  14.0
-4 250  15.0
-4 254  16.0
-4 256  17.0
+1 1    -1
+1 127   2
+1 128  -3
+1 255   4
+2 2    -5
+2 254   6
+3 3    -7
+4 1     8
+4 2    -9
+4 4    10
+4 99  -11
+4 127  12
+4 128 -13
+4 129  14
+4 250 -15
+4 254  16
+4 256 -17