diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
--- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
@@ -28,11 +28,18 @@
 using namespace mlir;
 using namespace mlir::sparse_tensor;
 
+//===----------------------------------------------------------------------===//
+// Declarations of data structures.
+//===----------------------------------------------------------------------===//
+
 namespace {
 
 // Iteration graph sorting.
 enum SortMask { kSparseOnly = 0x0, kIncludeDense = 0x1, kIncludeUndef = 0x2 };
 
+// Reduction kinds.
+enum Reduction { kSum, kProduct, kAnd, kOr, kXor };
+
 // Code generation.
 struct CodeGen {
   CodeGen(SparsificationOptions o, unsigned numTensors, unsigned numLoops)
@@ -68,6 +75,7 @@
   // is most effective; we could generalize to more outer and while-loops.
   unsigned redExp;
   Value redVal;
+  Reduction redKind;
   // Current vector length and mask.
   unsigned curVecLength;
   Value curVecMask;
@@ -75,8 +83,12 @@
 
 } // namespace
 
-// Helper method to apply dimension ordering permutation.
-static unsigned perm(SparseTensorEncodingAttr &enc, unsigned d) {
+//===----------------------------------------------------------------------===//
+// Sparse compiler analysis methods.
+//===----------------------------------------------------------------------===//
+
+/// Helper method to apply dimension ordering permutation.
+static unsigned perm(const SparseTensorEncodingAttr &enc, unsigned d) {
   if (enc) {
     auto order = enc.getDimOrdering();
     if (order) {
@@ -87,8 +99,8 @@
   return d;
 }
 
-// Helper method to translate dim level type to internal representation.
-static Dim toDim(SparseTensorEncodingAttr &enc, unsigned d) {
+/// Helper method to translate dim level type to internal representation.
+static Dim toDim(const SparseTensorEncodingAttr &enc, unsigned d) {
   if (enc) {
     SparseTensorEncodingAttr::DimLevelType tp = enc.getDimLevelType()[d];
     if (tp == SparseTensorEncodingAttr::DimLevelType::Compressed)
@@ -283,6 +295,83 @@
   return false;
 }
 
+//===----------------------------------------------------------------------===//
+// Sparse compiler synthesis methods.
+//===----------------------------------------------------------------------===//
+
+/// Maps reduction kind to name encoding.
+static StringRef getReductionName(Reduction kind) {
+  switch (kind) {
+  case kSum:
+    return "add";
+  case kProduct:
+    return "mul";
+  case kAnd:
+    return "and";
+  case kOr:
+    return "or";
+  case kXor:
+    return "xor";
+  }
+  llvm_unreachable("unknown reduction kind");
+}
+
+/// Maps operation to reduction.
+static Reduction getReduction(Kind kind) {
+  switch (kind) {
+  case Kind::kAddF:
+  case Kind::kAddI:
+  case Kind::kSubF:
+  case Kind::kSubI:
+    return kSum;
+  case Kind::kMulF:
+  case Kind::kMulI:
+    return kProduct;
+  case Kind::kAndI:
+    return kAnd;
+  case Kind::kOrI:
+    return kOr;
+  case Kind::kXorI:
+    return kXor;
+  default:
+    llvm_unreachable("unexpected reduction operator");
+  }
+}
+
+/// Generates an initial value for a vector reductions, following the scheme
+/// given in Chapter 5 of "The Software Vectorization Handbook", where the
+/// initial scalar value is correctly embedded in the vector reduction value,
+/// and a straightforward horizontal reduction will complete the operation.
+static Value genReductionInit(PatternRewriter &rewriter, Location loc,
+                              Reduction kind, VectorType vtp, Value r) {
+  switch (kind) {
+  case kSum:
+  case kXor: {
+    // Initialize reduction vector to: | 0 | .. | 0 | r |
+    Attribute zero = rewriter.getZeroAttr(vtp);
+    Value vec = rewriter.create<ConstantOp>(loc, vtp, zero);
+    return rewriter.create<vector::InsertElementOp>(loc, r, vec, 0);
+  }
+  case kProduct: {
+    // Initialize reduction vector to: | 1 | .. | 1 | r |
+    Type etp = vtp.getElementType();
+    Attribute one;
+    if (etp.isa<FloatType>())
+      one = rewriter.getFloatAttr(etp, 1.0);
+    else
+      one = rewriter.getIntegerAttr(etp, 1);
+    Value vec =
+        rewriter.create<ConstantOp>(loc, vtp, DenseElementsAttr::get(vtp, one));
+    return rewriter.create<vector::InsertElementOp>(loc, r, vec, 0);
+  }
+  case kAnd:
+  case kOr:
+    // Initialize reduction vector to: | r | .. | r | r |
+    return rewriter.create<vector::BroadcastOp>(loc, vtp, r);
+  }
+  llvm_unreachable("unknown reduction kind");
+}
+
 /// Maps sparse integer option to actual integral storage type.
 static Type genIntType(PatternRewriter &rewriter, unsigned width) {
   if (width == 0)
@@ -644,11 +733,15 @@
                                linalg::GenericOp op) {
   if (codegen.redVal)
     return codegen.redVal; // chained with previous for-loop
-  if (codegen.curVecLength > 1) {
-    // TODO: assumes + reductions for now
+  // Generate vector or scalar start of a reduction.
+  unsigned vl = codegen.curVecLength;
+  if (vl > 1) {
     VectorType vtp = vectorType(codegen, codegen.buffers[codegen.redExp]);
-    return rewriter.create<ConstantOp>(op.getLoc(), vtp,
-                                       rewriter.getZeroAttr(vtp));
+    assert(!merger.exp(codegen.redExp).val);
+    codegen.curVecLength = 1;
+    Value load = genTensorLoad(merger, codegen, rewriter, op, codegen.redExp);
+    codegen.curVecLength = vl;
+    return genReductionInit(rewriter, op.getLoc(), codegen.redKind, vtp, load);
   }
   return genTensorLoad(merger, codegen, rewriter, op, codegen.redExp);
 }
@@ -661,19 +754,12 @@
     return;
   assert(codegen.curVecLength == 1);
   codegen.redVal = merger.exp(codegen.redExp).val = Value(); // end chain
+  // Generate vector or scalar end of a reduction.
   if (auto vtp = red.getType().dyn_cast<VectorType>()) {
-    // TODO: assumes + reductions for now
-    StringAttr kind = rewriter.getStringAttr("add");
-    Value ld = genTensorLoad(merger, codegen, rewriter, op, codegen.redExp);
-    // Integer reductions don't accept an accumulator.
-    if (vtp.getElementType().isa<IntegerType>()) {
-      red = rewriter.create<vector::ReductionOp>(op.getLoc(), ld.getType(),
-                                                 kind, red, ValueRange{});
-      red = rewriter.create<AddIOp>(op.getLoc(), red, ld);
-    } else {
-      red = rewriter.create<vector::ReductionOp>(op.getLoc(), ld.getType(),
-                                                 kind, red, ld);
-    }
+    StringRef name = getReductionName(codegen.redKind);
+    StringAttr kind = rewriter.getStringAttr(name);
+    red = rewriter.create<vector::ReductionOp>(
+        op.getLoc(), vtp.getElementType(), kind, red, ValueRange{});
   }
   genTensorStore(merger, codegen, rewriter, op, red);
 }
@@ -725,7 +811,8 @@
 /// Hoists loop invariant tensor loads for which indices have been exhausted.
 static void genInvariants(Merger &merger, CodeGen &codegen,
                           PatternRewriter &rewriter, linalg::GenericOp op,
-                          unsigned exp, unsigned ldx, bool hoist) {
+                          unsigned exp, unsigned ldx, bool hoist,
+                          Kind last = Kind::kTensor) {
   if (exp == -1u)
     return;
   if (merger.exp(exp).kind == Kind::kTensor) {
@@ -743,6 +830,7 @@
     OpOperand *lhs = op.getOutputOperand(0);
     if (lhs == t) {
       codegen.redExp = hoist ? exp : -1u;
+      codegen.redKind = getReduction(last);
     } else if (atLevel) {
       merger.exp(exp).val =
           hoist ? genTensorLoad(merger, codegen, rewriter, op, exp) : Value();
@@ -751,10 +839,11 @@
     // Traverse into the binary operations. Note that we only hoist
     // tensor loads, since subsequent MLIR/LLVM passes know how to
     // deal with all other kinds of derived loop invariants.
+    Kind last = merger.exp(exp).kind;
     unsigned e0 = merger.exp(exp).children.e0;
     unsigned e1 = merger.exp(exp).children.e1;
-    genInvariants(merger, codegen, rewriter, op, e0, ldx, hoist);
-    genInvariants(merger, codegen, rewriter, op, e1, ldx, hoist);
+    genInvariants(merger, codegen, rewriter, op, e0, ldx, hoist, last);
+    genInvariants(merger, codegen, rewriter, op, e1, ldx, hoist, last);
   }
 }
 
@@ -1233,6 +1322,10 @@
   rewriter.replaceOp(op, result);
 }
 
+//===----------------------------------------------------------------------===//
+// Sparse compiler rewriting methods.
+//===----------------------------------------------------------------------===//
+
 namespace {
 
 /// Sparse rewriting rule for generic Lingalg operation.
diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_vector.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_vector.mlir
@@ -210,32 +210,38 @@
 //
 // CHECK-VEC1-LABEL: func @reduction_d
 // CHECK-VEC1-DAG:   %[[c0:.*]] = constant 0 : index
+// CHECK-VEC1-DAG:   %[[i0:.*]] = constant 0 : i32
 // CHECK-VEC1-DAG:   %[[c16:.*]] = constant 16 : index
 // CHECK-VEC1-DAG:   %[[c1024:.*]] = constant 1024 : index
 // CHECK-VEC1-DAG:   %[[v0:.*]] = constant dense<0.000000e+00> : vector<16xf32>
-// CHECK-VEC1:       %[[red:.*]] = scf.for %[[i:.*]] = %[[c0]] to %[[c1024]] step %[[c16]] iter_args(%[[red_in:.*]] = %[[v0]]) -> (vector<16xf32>) {
+// CHECK-VEC1:       %[[l:.*]] = memref.load %{{.*}}[] : memref<f32>
+// CHECK-VEC1:       %[[r:.*]] = vector.insertelement %[[l]], %[[v0]][%[[i0]] : i32] : vector<16xf32>
+// CHECK-VEC1:       %[[red:.*]] = scf.for %[[i:.*]] = %[[c0]] to %[[c1024]] step %[[c16]] iter_args(%[[red_in:.*]] = %[[r]]) -> (vector<16xf32>) {
 // CHECK-VEC1:         %[[la:.*]] = vector.load %{{.*}}[%[[i]]] : memref<?xf32>, vector<16xf32>
 // CHECK-VEC1:         %[[lb:.*]] = vector.load %{{.*}}[%[[i]]] : memref<1024xf32>, vector<16xf32>
 // CHECK-VEC1:         %[[m:.*]] = mulf %[[la]], %[[lb]] : vector<16xf32>
 // CHECK-VEC1:         %[[a:.*]] = addf %[[red_in]], %[[m]] : vector<16xf32>
 // CHECK-VEC1:         scf.yield %[[a]] : vector<16xf32>
 // CHECK-VEC1:       }
-// CHECK-VEC1:       %{{.*}} = vector.reduction "add", %[[red]], %{{.*}} : vector<16xf32> into f32
+// CHECK-VEC1:       %{{.*}} = vector.reduction "add", %[[red]] : vector<16xf32> into f32
 // CHECK-VEC1:       return
 //
 // CHECK-VEC2-LABEL: func @reduction_d
 // CHECK-VEC2-DAG:   %[[c0:.*]] = constant 0 : index
+// CHECK-VEC2-DAG:   %[[i0:.*]] = constant 0 : i32
 // CHECK-VEC2-DAG:   %[[c16:.*]] = constant 16 : index
 // CHECK-VEC2-DAG:   %[[c1024:.*]] = constant 1024 : index
 // CHECK-VEC2-DAG:   %[[v0:.*]] = constant dense<0.000000e+00> : vector<16xf32>
-// CHECK-VEC2:       %[[red:.*]] = scf.for %[[i:.*]] = %[[c0]] to %[[c1024]] step %[[c16]] iter_args(%[[red_in:.*]] = %[[v0]]) -> (vector<16xf32>) {
+// CHECK-VEC2:       %[[l:.*]] = memref.load %{{.*}}[] : memref<f32>
+// CHECK-VEC2:       %[[r:.*]] = vector.insertelement %[[l]], %[[v0]][%[[i0]] : i32] : vector<16xf32>
+// CHECK-VEC2:       %[[red:.*]] = scf.for %[[i:.*]] = %[[c0]] to %[[c1024]] step %[[c16]] iter_args(%[[red_in:.*]] = %[[r]]) -> (vector<16xf32>) {
 // CHECK-VEC2:         %[[la:.*]] = vector.load %{{.*}}[%[[i]]] : memref<?xf32>, vector<16xf32>
 // CHECK-VEC2:         %[[lb:.*]] = vector.load %{{.*}}[%[[i]]] : memref<1024xf32>, vector<16xf32>
 // CHECK-VEC2:         %[[m:.*]] = mulf %[[la]], %[[lb]] : vector<16xf32>
 // CHECK-VEC2:         %[[a:.*]] = addf %[[red_in]], %[[m]] : vector<16xf32>
 // CHECK-VEC2:         scf.yield %[[a]] : vector<16xf32>
 // CHECK-VEC2:       }
-// CHECK-VEC2:       %{{.*}} = vector.reduction "add", %[[red]], %{{.*}} : vector<16xf32> into f32
+// CHECK-VEC2:       %{{.*}} = vector.reduction "add", %[[red]] : vector<16xf32> into f32
 // CHECK-VEC2:       return
 //
 func @reduction_d(%arga: tensor<1024xf32, #DenseVector>, %argb: tensor<1024xf32>, %argx: tensor<f32>) -> tensor<f32> {
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir
new file mode 100644
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir
@@ -0,0 +1,216 @@
+// RUN: mlir-opt %s \
+// RUN:   --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \
+// RUN:   --sparsification --sparse-tensor-conversion \
+// RUN:   --convert-vector-to-scf --convert-scf-to-std \
+// RUN:   --func-bufferize --tensor-constant-bufferize --tensor-bufferize \
+// RUN:   --std-bufferize --finalizing-bufferize --lower-affine \
+// RUN:   --convert-vector-to-llvm --convert-memref-to-llvm \
+// RUN:   --convert-std-to-llvm --reconcile-unrealized-casts | \
+// RUN: mlir-cpu-runner \
+// RUN:  -e entry -entry-point-result=void  \
+// RUN:  -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+//
+// Do the same run, but now with SIMDization as well. This should not change the outcome.
+//
+// RUN: mlir-opt %s \
+// RUN:   --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \
+// RUN:   --sparsification="vectorization-strategy=2 vl=8" --sparse-tensor-conversion \
+// RUN:   --convert-vector-to-scf --convert-scf-to-std \
+// RUN:   --func-bufferize --tensor-constant-bufferize --tensor-bufferize \
+// RUN:   --std-bufferize --finalizing-bufferize --lower-affine \
+// RUN:   --convert-vector-to-llvm --convert-memref-to-llvm \
+// RUN:   --convert-std-to-llvm --reconcile-unrealized-casts | \
+// RUN: mlir-cpu-runner \
+// RUN:  -e entry -entry-point-result=void  \
+// RUN:  -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+
+#SV = #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>
+#DV = #sparse_tensor.encoding<{ dimLevelType = [ "dense"      ] }>
+
+#trait_reduction = {
+  indexing_maps = [
+    affine_map<(i) -> (i)>,  // a
+    affine_map<(i) -> ()>    // x (scalar out)
+  ],
+  iterator_types = ["reduction"],
+  doc = "x += OPER_i a(i)"
+}
+
+// An example of vector reductions.
+module {
+
+  func @sum_reduction_i32(%arga: tensor<32xi32, #SV>,
+                          %argx: tensor<i32>) -> tensor<i32> {
+    %0 = linalg.generic #trait_reduction
+      ins(%arga: tensor<32xi32, #SV>)
+      outs(%argx: tensor<i32>) {
+        ^bb(%a: i32, %x: i32):
+          %0 = addi %x, %a : i32
+          linalg.yield %0 : i32
+    } -> tensor<i32>
+    return %0 : tensor<i32>
+  }
+
+  func @sum_reduction_f32(%arga: tensor<32xf32, #SV>,
+                          %argx: tensor<f32>) -> tensor<f32> {
+    %0 = linalg.generic #trait_reduction
+      ins(%arga: tensor<32xf32, #SV>)
+      outs(%argx: tensor<f32>) {
+        ^bb(%a: f32, %x: f32):
+          %0 = addf %x, %a : f32
+          linalg.yield %0 : f32
+    } -> tensor<f32>
+    return %0 : tensor<f32>
+  }
+
+  func @prod_reduction_i32(%arga: tensor<32xi32, #DV>,
+                           %argx: tensor<i32>) -> tensor<i32> {
+    %0 = linalg.generic #trait_reduction
+      ins(%arga: tensor<32xi32, #DV>)
+      outs(%argx: tensor<i32>) {
+        ^bb(%a: i32, %x: i32):
+          %0 = muli %x, %a : i32
+          linalg.yield %0 : i32
+    } -> tensor<i32>
+    return %0 : tensor<i32>
+  }
+
+  func @prod_reduction_f32(%arga: tensor<32xf32, #DV>,
+                           %argx: tensor<f32>) -> tensor<f32> {
+    %0 = linalg.generic #trait_reduction
+      ins(%arga: tensor<32xf32, #DV>)
+      outs(%argx: tensor<f32>) {
+        ^bb(%a: f32, %x: f32):
+          %0 = mulf %x, %a : f32
+          linalg.yield %0 : f32
+    } -> tensor<f32>
+    return %0 : tensor<f32>
+  }
+
+  func @and_reduction_i32(%arga: tensor<32xi32, #DV>,
+                          %argx: tensor<i32>) -> tensor<i32> {
+    %0 = linalg.generic #trait_reduction
+      ins(%arga: tensor<32xi32, #DV>)
+      outs(%argx: tensor<i32>) {
+        ^bb(%a: i32, %x: i32):
+          %0 = and %x, %a : i32
+          linalg.yield %0 : i32
+    } -> tensor<i32>
+    return %0 : tensor<i32>
+  }
+
+  func @or_reduction_i32(%arga: tensor<32xi32, #SV>,
+                         %argx: tensor<i32>) -> tensor<i32> {
+    %0 = linalg.generic #trait_reduction
+      ins(%arga: tensor<32xi32, #SV>)
+      outs(%argx: tensor<i32>) {
+        ^bb(%a: i32, %x: i32):
+          %0 = or %x, %a : i32
+          linalg.yield %0 : i32
+    } -> tensor<i32>
+    return %0 : tensor<i32>
+  }
+
+  func @xor_reduction_i32(%arga: tensor<32xi32, #SV>,
+                          %argx: tensor<i32>) -> tensor<i32> {
+    %0 = linalg.generic #trait_reduction
+      ins(%arga: tensor<32xi32, #SV>)
+      outs(%argx: tensor<i32>) {
+        ^bb(%a: i32, %x: i32):
+          %0 = xor %x, %a : i32
+          linalg.yield %0 : i32
+    } -> tensor<i32>
+    return %0 : tensor<i32>
+  }
+
+  func @dump_i32(%arg0 : tensor<i32>) {
+    %m = memref.buffer_cast %arg0 : memref<i32>
+    %v = memref.load %m[] : memref<i32>
+    vector.print %v : i32
+    return
+  }
+
+  func @dump_f32(%arg0 : tensor<f32>) {
+    %m = memref.buffer_cast %arg0 : memref<f32>
+    %v = memref.load %m[] : memref<f32>
+    vector.print %v : f32
+    return
+  }
+
+  func @entry() {
+    %ri = constant dense< 7   > : tensor<i32>
+    %rf = constant dense< 2.0 > : tensor<f32>
+
+    %c_0_i32 = constant dense<[
+      0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 4, 0, 0, 0,
+      0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0
+    ]> : tensor<32xi32>
+
+    %c_0_f32 = constant dense<[
+      0.0, 1.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0,
+      0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+      0.0, 0.0, 0.0, 0.0, 2.5, 0.0, 0.0, 0.0,
+      2.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 9.0
+    ]> : tensor<32xf32>
+
+    %c_1_i32 = constant dense<[
+      1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 3
+    ]> : tensor<32xi32>
+
+    %c_1_f32 = constant dense<[
+      1.0, 1.0, 1.0, 3.5, 1.0, 1.0, 1.0, 1.0,
+      1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+      1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0,
+      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 4.0
+    ]> : tensor<32xf32>
+
+    // Convert constants to annotated tensors.
+    %sparse_input_i32 = sparse_tensor.convert %c_0_i32
+      : tensor<32xi32> to tensor<32xi32, #SV>
+    %sparse_input_f32 = sparse_tensor.convert %c_0_f32
+      : tensor<32xf32> to tensor<32xf32, #SV>
+    %dense_input_i32 = sparse_tensor.convert %c_1_i32
+      : tensor<32xi32> to tensor<32xi32, #DV>
+    %dense_input_f32 = sparse_tensor.convert %c_1_f32
+      : tensor<32xf32> to tensor<32xf32, #DV>
+
+    // Call the kernels.
+    %0 = call @sum_reduction_i32(%sparse_input_i32, %ri)
+       : (tensor<32xi32, #SV>, tensor<i32>) -> tensor<i32>
+    %1 = call @sum_reduction_f32(%sparse_input_f32, %rf)
+       : (tensor<32xf32, #SV>, tensor<f32>) -> tensor<f32>
+    %2 = call @prod_reduction_i32(%dense_input_i32, %ri)
+       : (tensor<32xi32, #DV>, tensor<i32>) -> tensor<i32>
+    %3 = call @prod_reduction_f32(%dense_input_f32, %rf)
+       : (tensor<32xf32, #DV>, tensor<f32>) -> tensor<f32>
+    %4 = call @and_reduction_i32(%dense_input_i32, %ri)
+       : (tensor<32xi32, #DV>, tensor<i32>) -> tensor<i32>
+    %5 = call @or_reduction_i32(%sparse_input_i32, %ri)
+       : (tensor<32xi32, #SV>, tensor<i32>) -> tensor<i32>
+    %6 = call @xor_reduction_i32(%sparse_input_i32, %ri)
+       : (tensor<32xi32, #SV>, tensor<i32>) -> tensor<i32>
+
+    // Verify results.
+    //
+    // CHECK: 26
+    // CHECK: 27.5
+    // CHECK: 3087
+    // CHECK: 168
+    // CHECK: 1
+    // CHECK: 15
+    // CHECK: 10
+    //
+    call @dump_i32(%0) : (tensor<i32>) -> ()
+    call @dump_f32(%1) : (tensor<f32>) -> ()
+    call @dump_i32(%2) : (tensor<i32>) -> ()
+    call @dump_f32(%3) : (tensor<f32>) -> ()
+    call @dump_i32(%4) : (tensor<i32>) -> ()
+    call @dump_i32(%5) : (tensor<i32>) -> ()
+    call @dump_i32(%6) : (tensor<i32>) -> ()
+
+    return
+  }
+}