diff --git a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h --- a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h @@ -230,6 +230,7 @@ Value v1); private: + bool isZero(unsigned e) const; bool maybeZero(unsigned e) const; bool isInvariant(unsigned e) const; Type inferType(unsigned e, Value src); diff --git a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp --- a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp +++ b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp @@ -489,6 +489,11 @@ // ---+---+---+ ---+---+---+ // !x | 0 | y | !x | 0 |-y | // x | x |x+y| x | x |x-y| + // + // TODO: remove this zero "folding" in favor of external pass into linalg + // + if (isZero(tensorExps[e].children.e1)) + return buildLattices(tensorExps[e].children.e0, i); return takeDisj(kind, // take binary disjunction buildLattices(tensorExps[e].children.e0, i), buildLattices(tensorExps[e].children.e1, i)); @@ -511,6 +516,18 @@ return buildTensorExp(op, yield->getOperand(0)); } +/// Only returns true if we are certain this is a zero. +bool Merger::isZero(unsigned e) const { + if (tensorExps[e].kind == kInvariant) { + if (auto c = tensorExps[e].val.getDefiningOp()) + return c.getValue() == 0; + if (auto c = tensorExps[e].val.getDefiningOp()) + return c.getValue().isZero(); + } + return false; +} + +/// Only returns false if we are certain this is a nonzero. bool Merger::maybeZero(unsigned e) const { if (tensorExps[e].kind == kInvariant) { if (auto c = tensorExps[e].val.getDefiningOp()) diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir @@ -0,0 +1,76 @@ +// RUN: mlir-opt %s \ +// RUN: --linalg-generalize-named-ops \ +// RUN: --sparsification --sparse-tensor-conversion \ +// RUN: --convert-vector-to-scf --convert-scf-to-std \ +// RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \ +// RUN: --std-bufferize --finalizing-bufferize --lower-affine \ +// RUN: --convert-vector-to-llvm --convert-memref-to-llvm \ +// RUN: --convert-std-to-llvm --reconcile-unrealized-casts | \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +#DCSR = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }> + +// An example of a quantized sparse matmul. With the zero offset for the +// sparse input, the sparse compiler generates very efficient code for the +// x(i,j) += (ext(a(i,k)) - 2) * ext(b(k,j)) +// operation. +module { + + func @quantized_matmul(%input1: tensor<5x3xi8>, + %input2: tensor<3x6xi8, #DCSR>, + %output: tensor<5x6xi32>) -> tensor<5x6xi32> { + %c0 = constant 0 : i32 + %c2 = constant 2 : i32 + %0 = linalg.quantized_matmul + ins(%input1, %input2, %c2, %c0 : tensor<5x3xi8>, tensor<3x6xi8, #DCSR>, i32, i32) + outs(%output : tensor<5x6xi32>) -> tensor<5x6xi32> + return %0: tensor<5x6xi32> + } + + func @entry() { + %c0 = constant 0 : index + %i0 = constant 0 : i32 + + %input1 = constant dense<[ + [ -128, 3, 127 ], + [ 0, 0, 0 ], + [ 11, 1, 0 ], + [ 0, 5, -1 ], + [ 13, 0, 3 ] + ]> : tensor<5x3xi8> + + %input2 = constant dense<[ + [ 127, 0, -128, 0, 0, 3 ], + [ 0, 0, 0, 0, 0, 0 ], + [ 0, 0, 0, 100, 10, 0 ] + ]> : tensor<3x6xi8> + + %sparse_input2 = sparse_tensor.convert %input2 : tensor<3x6xi8> to tensor<3x6xi8, #DCSR> + + // Call the kernel. + %output = constant dense<0> : tensor<5x6xi32> + %0 = call @quantized_matmul(%input1, %sparse_input2, %output) + : (tensor<5x3xi8>, + tensor<3x6xi8, #DCSR>, + tensor<5x6xi32>) -> tensor<5x6xi32> + + // + // Verify the output. + // + // CHECK: ( ( -16510, 0, 16640, 12500, 1250, -390 ), + // CHECK-SAME: ( -254, 0, 256, -200, -20, -6 ), + // CHECK-SAME: ( 1143, 0, -1152, -200, -20, 27 ), + // CHECK-SAME: ( -254, 0, 256, -300, -30, -6 ), + // CHECK-SAME: ( 1397, 0, -1408, 100, 10, 33 ) ) + // + %m = memref.buffer_cast %0 : memref<5x6xi32> + %v = vector.transfer_read %m[%c0, %c0], %i0 + : memref<5x6xi32>, vector<5x6xi32> + vector.print %v : vector<5x6xi32> + + return + } +}