diff --git a/mlir/lib/Dialect/Linalg/Transforms/SparseLowering.cpp b/mlir/lib/Dialect/Linalg/Transforms/SparseLowering.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/SparseLowering.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/SparseLowering.cpp @@ -132,6 +132,12 @@ name = "sparseValuesF64"; else if (eltType.isF32()) name = "sparseValuesF32"; + else if (eltType.isInteger(32)) + name = "sparseValuesI32"; + else if (eltType.isInteger(16)) + name = "sparseValuesI16"; + else if (eltType.isInteger(8)) + name = "sparseValuesI8"; else return failure(); rewriter.replaceOpWithNewOp( diff --git a/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp b/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp @@ -837,11 +837,19 @@ assert(codegen.curVecLength == 1); codegen.redVal = merger.exp(codegen.redExp).val = Value(); // end chain unsigned lhs = op.getNumShapedOperands() - 1; - if (red.getType().isa()) { + if (auto vtp = red.getType().dyn_cast()) { // TODO: assumes + reductions for now + StringAttr kind = rewriter.getStringAttr("add"); Value ld = genTensorLoad(merger, codegen, rewriter, op, codegen.redExp); - red = rewriter.create( - op.getLoc(), ld.getType(), rewriter.getStringAttr("add"), red, ld); + // Integer reductions don't accept an accumulator. + if (vtp.getElementType().isa()) { + red = rewriter.create(op.getLoc(), ld.getType(), + kind, red, ValueRange{}); + red = rewriter.create(op.getLoc(), red, ld); + } else { + red = rewriter.create(op.getLoc(), ld.getType(), + kind, red, ld); + } } genTensorStore(merger, codegen, rewriter, op, lhs, red); } diff --git a/mlir/lib/ExecutionEngine/SparseUtils.cpp b/mlir/lib/ExecutionEngine/SparseUtils.cpp --- a/mlir/lib/ExecutionEngine/SparseUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseUtils.cpp @@ -127,6 +127,9 @@ // Primary storage. virtual void getValues(std::vector **) { fatal("valf64"); } virtual void getValues(std::vector **) { fatal("valf32"); } + virtual void getValues(std::vector **) { fatal("vali32"); } + virtual void getValues(std::vector **) { fatal("vali16"); } + virtual void getValues(std::vector **) { fatal("vali8"); } virtual ~SparseTensorStorageBase() {} @@ -453,64 +456,58 @@ // implementation of a bufferized SparseTensor in MLIR. This could be replaced // by actual codegen in MLIR. // +// Because we cannot use C++ templates with C linkage, some macro magic is used +// to generate implementations for all required type combinations that can be +// called from MLIR generated code. +// //===----------------------------------------------------------------------===// -// Cannot use templates with C linkage. - -struct MemRef1DU64 { - const uint64_t *base; - const uint64_t *data; - uint64_t off; - uint64_t sizes[1]; - uint64_t strides[1]; -}; - -struct MemRef1DU32 { - const uint32_t *base; - const uint32_t *data; - uint64_t off; - uint64_t sizes[1]; - uint64_t strides[1]; -}; +#define TEMPLATE(NAME, TYPE) \ + struct NAME { \ + const TYPE *base; \ + const TYPE *data; \ + uint64_t off; \ + uint64_t sizes[1]; \ + uint64_t strides[1]; \ + } -struct MemRef1DU16 { - const uint16_t *base; - const uint16_t *data; - uint64_t off; - uint64_t sizes[1]; - uint64_t strides[1]; -}; +#define CASE(p, i, v, P, I, V) \ + if (ptrTp == (p) && indTp == (i) && valTp == (v)) \ + return newSparseTensor(filename, sparsity, asize) -struct MemRef1DU8 { - const uint8_t *base; - const uint8_t *data; - uint64_t off; - uint64_t sizes[1]; - uint64_t strides[1]; -}; +#define IMPL1(RET, NAME, TYPE, LIB) \ + RET NAME(void *tensor) { \ + std::vector *v; \ + static_cast(tensor)->LIB(&v); \ + return {v->data(), v->data(), 0, {v->size()}, {1}}; \ + } -struct MemRef1DF64 { - const double *base; - const double *data; - uint64_t off; - uint64_t sizes[1]; - uint64_t strides[1]; -}; +#define IMPL2(RET, NAME, TYPE, LIB) \ + RET NAME(void *tensor, uint64_t d) { \ + std::vector *v; \ + static_cast(tensor)->LIB(&v, d); \ + return {v->data(), v->data(), 0, {v->size()}, {1}}; \ + } -struct MemRef1DF32 { - const float *base; - const float *data; - uint64_t off; - uint64_t sizes[1]; - uint64_t strides[1]; -}; +TEMPLATE(MemRef1DU64, uint64_t); +TEMPLATE(MemRef1DU32, uint32_t); +TEMPLATE(MemRef1DU16, uint16_t); +TEMPLATE(MemRef1DU8, uint8_t); +TEMPLATE(MemRef1DI32, int32_t); +TEMPLATE(MemRef1DI16, int16_t); +TEMPLATE(MemRef1DI8, int8_t); +TEMPLATE(MemRef1DF64, double); +TEMPLATE(MemRef1DF32, float); enum OverheadTypeEnum : uint64_t { kU64 = 1, kU32 = 2, kU16 = 3, kU8 = 4 }; -enum PrimaryTypeEnum : uint64_t { kF64 = 1, kF32 = 2 }; -#define CASE(p, i, v, P, I, V) \ - if (ptrTp == (p) && indTp == (i) && valTp == (v)) \ - return newSparseTensor(filename, sparsity, asize) +enum PrimaryTypeEnum : uint64_t { + kF64 = 1, + kF32 = 2, + kI32 = 3, + kI16 = 4, + kI8 = 5 +}; void *newSparseTensor(char *filename, bool *abase, bool *adata, uint64_t aoff, uint64_t asize, uint64_t astride, uint64_t ptrTp, @@ -534,6 +531,17 @@ CASE(kU16, kU16, kF32, uint16_t, uint16_t, float); CASE(kU8, kU8, kF32, uint8_t, uint8_t, float); + // Integral matrices with low overhead storage. + CASE(kU32, kU32, kI32, uint32_t, uint32_t, int32_t); + CASE(kU32, kU32, kI16, uint32_t, uint32_t, int16_t); + CASE(kU32, kU32, kI8, uint32_t, uint32_t, int8_t); + CASE(kU16, kU16, kI32, uint16_t, uint16_t, int32_t); + CASE(kU16, kU16, kI16, uint16_t, uint16_t, int16_t); + CASE(kU16, kU16, kI8, uint16_t, uint16_t, int8_t); + CASE(kU8, kU8, kI32, uint8_t, uint8_t, int32_t); + CASE(kU8, kU8, kI16, uint8_t, uint8_t, int16_t); + CASE(kU8, kU8, kI8, uint8_t, uint8_t, int8_t); + // Unsupported case (add above if needed). fputs("unsupported combination of types\n", stderr); exit(1); @@ -545,70 +553,29 @@ return static_cast(tensor)->getDimSize(d); } -MemRef1DU64 sparsePointers64(void *tensor, uint64_t d) { - std::vector *v; - static_cast(tensor)->getPointers(&v, d); - return {v->data(), v->data(), 0, {v->size()}, {1}}; -} - -MemRef1DU32 sparsePointers32(void *tensor, uint64_t d) { - std::vector *v; - static_cast(tensor)->getPointers(&v, d); - return {v->data(), v->data(), 0, {v->size()}, {1}}; -} - -MemRef1DU16 sparsePointers16(void *tensor, uint64_t d) { - std::vector *v; - static_cast(tensor)->getPointers(&v, d); - return {v->data(), v->data(), 0, {v->size()}, {1}}; -} - -MemRef1DU8 sparsePointers8(void *tensor, uint64_t d) { - std::vector *v; - static_cast(tensor)->getPointers(&v, d); - return {v->data(), v->data(), 0, {v->size()}, {1}}; -} - -MemRef1DU64 sparseIndices64(void *tensor, uint64_t d) { - std::vector *v; - static_cast(tensor)->getIndices(&v, d); - return {v->data(), v->data(), 0, {v->size()}, {1}}; -} - -MemRef1DU32 sparseIndices32(void *tensor, uint64_t d) { - std::vector *v; - static_cast(tensor)->getIndices(&v, d); - return {v->data(), v->data(), 0, {v->size()}, {1}}; -} - -MemRef1DU16 sparseIndices16(void *tensor, uint64_t d) { - std::vector *v; - static_cast(tensor)->getIndices(&v, d); - return {v->data(), v->data(), 0, {v->size()}, {1}}; -} - -MemRef1DU8 sparseIndices8(void *tensor, uint64_t d) { - std::vector *v; - static_cast(tensor)->getIndices(&v, d); - return {v->data(), v->data(), 0, {v->size()}, {1}}; -} - -MemRef1DF64 sparseValuesF64(void *tensor) { - std::vector *v; - static_cast(tensor)->getValues(&v); - return {v->data(), v->data(), 0, {v->size()}, {1}}; -} - -MemRef1DF32 sparseValuesF32(void *tensor) { - std::vector *v; - static_cast(tensor)->getValues(&v); - return {v->data(), v->data(), 0, {v->size()}, {1}}; -} +IMPL2(MemRef1DU64, sparsePointers64, uint64_t, getPointers) +IMPL2(MemRef1DU32, sparsePointers32, uint32_t, getPointers) +IMPL2(MemRef1DU16, sparsePointers16, uint16_t, getPointers) +IMPL2(MemRef1DU8, sparsePointers8, uint8_t, getPointers) +IMPL2(MemRef1DU64, sparseIndices64, uint64_t, getIndices) +IMPL2(MemRef1DU32, sparseIndices32, uint32_t, getIndices) +IMPL2(MemRef1DU16, sparseIndices16, uint16_t, getIndices) +IMPL2(MemRef1DU8, sparseIndices8, uint8_t, getIndices) +IMPL1(MemRef1DF64, sparseValuesF64, double, getValues) +IMPL1(MemRef1DF32, sparseValuesF32, float, getValues) +IMPL1(MemRef1DI32, sparseValuesI32, int32_t, getValues) +IMPL1(MemRef1DI16, sparseValuesI16, int16_t, getValues) +IMPL1(MemRef1DI8, sparseValuesI8, int8_t, getValues) void delSparseTensor(void *tensor) { delete static_cast(tensor); } +#undef TEMPLATE +#undef CASE +#undef IMPL1 +#undef IMPL2 + } // extern "C" #endif // MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS diff --git a/mlir/test/Integration/Sparse/CPU/sparse_matvec.mlir b/mlir/test/Integration/Sparse/CPU/sparse_matvec.mlir --- a/mlir/test/Integration/Sparse/CPU/sparse_matvec.mlir +++ b/mlir/test/Integration/Sparse/CPU/sparse_matvec.mlir @@ -54,18 +54,18 @@ // a sparse matrix A with a dense vector b into a dense vector x. // func @kernel_matvec(%argA: !SparseTensor, - %argb: tensor, - %argx: tensor) -> tensor { - %arga = linalg.sparse_tensor %argA : !SparseTensor to tensor + %argb: tensor, + %argx: tensor) -> tensor { + %arga = linalg.sparse_tensor %argA : !SparseTensor to tensor %0 = linalg.generic #matvec - ins(%arga, %argb: tensor, tensor) - outs(%argx: tensor) { - ^bb(%a: f32, %b: f32, %x: f32): - %0 = mulf %a, %b : f32 - %1 = addf %x, %0 : f32 - linalg.yield %1 : f32 - } -> tensor - return %0 : tensor + ins(%arga, %argb: tensor, tensor) + outs(%argx: tensor) { + ^bb(%a: i32, %b: i32, %x: i32): + %0 = muli %a, %b : i32 + %1 = addi %x, %0 : i32 + linalg.yield %1 : i32 + } -> tensor + return %0 : tensor } // @@ -79,7 +79,7 @@ // Main driver that reads matrix from file and calls the sparse kernel. // func @entry() { - %f0 = constant 0.0 : f32 + %i0 = constant 0 : i32 %c0 = constant 0 : index %c1 = constant 1 : index %c2 = constant 2 : index @@ -89,51 +89,51 @@ // Mark inner dimension of the matrix as sparse and encode the // storage scheme types (this must match the metadata in the // alias above and compiler switches). In this case, we test - // that 8-bit indices and pointers work correctly. + // that 8-bit indices and pointers work correctly on a matrix + // with i32 elements. %annotations = memref.alloc(%c2) : memref %sparse = constant true %dense = constant false memref.store %dense, %annotations[%c0] : memref memref.store %sparse, %annotations[%c1] : memref %u8 = constant 4 : index - %f32 = constant 2 : index + %i32 = constant 3 : index // Read the sparse matrix from file, construct sparse storage. %fileName = call @getTensorFilename(%c0) : (index) -> (!Filename) - %a = call @newSparseTensor(%fileName, %annotations, %u8, %u8, %f32) + %a = call @newSparseTensor(%fileName, %annotations, %u8, %u8, %i32) : (!Filename, memref, index, index, index) -> (!SparseTensor) // Initialize dense vectors. - %bdata = memref.alloc(%c256) : memref - %xdata = memref.alloc(%c4) : memref + %bdata = memref.alloc(%c256) : memref + %xdata = memref.alloc(%c4) : memref scf.for %i = %c0 to %c256 step %c1 { %k = addi %i, %c1 : index - %l = index_cast %k : index to i32 - %f = sitofp %l : i32 to f32 - memref.store %f, %bdata[%i] : memref + %j = index_cast %k : index to i32 + memref.store %j, %bdata[%i] : memref } scf.for %i = %c0 to %c4 step %c1 { - memref.store %f0, %xdata[%i] : memref + memref.store %i0, %xdata[%i] : memref } - %b = memref.tensor_load %bdata : memref - %x = memref.tensor_load %xdata : memref + %b = memref.tensor_load %bdata : memref + %x = memref.tensor_load %xdata : memref // Call kernel. %0 = call @kernel_matvec(%a, %b, %x) - : (!SparseTensor, tensor, tensor) -> tensor + : (!SparseTensor, tensor, tensor) -> tensor // Print the result for verification. // - // CHECK: ( 1659, 1534, 21, 18315 ) + // CHECK: ( 889, 1514, -21, -3431 ) // - %m = memref.buffer_cast %0 : memref - %v = vector.transfer_read %m[%c0], %f0: memref, vector<4xf32> - vector.print %v : vector<4xf32> + %m = memref.buffer_cast %0 : memref + %v = vector.transfer_read %m[%c0], %i0: memref, vector<4xi32> + vector.print %v : vector<4xi32> // Release the resources. call @delSparseTensor(%a) : (!SparseTensor) -> () - memref.dealloc %bdata : memref - memref.dealloc %xdata : memref + memref.dealloc %bdata : memref + memref.dealloc %xdata : memref return } diff --git a/mlir/test/Integration/data/wide.mtx b/mlir/test/Integration/data/wide.mtx --- a/mlir/test/Integration/data/wide.mtx +++ b/mlir/test/Integration/data/wide.mtx @@ -4,20 +4,20 @@ % see https://math.nist.gov/MatrixMarket % 4 256 17 -1 1 1.0 -1 127 2.0 -1 128 3.0 -1 255 4.0 -2 2 5.0 -2 254 6.0 -3 3 7.0 -4 1 8.0 -4 2 9.0 -4 4 10.0 -4 99 11.0 -4 127 12.0 -4 128 13.0 -4 129 14.0 -4 250 15.0 -4 254 16.0 -4 256 17.0 +1 1 -1 +1 127 2 +1 128 -3 +1 255 4 +2 2 -5 +2 254 6 +3 3 -7 +4 1 8 +4 2 -9 +4 4 10 +4 99 -11 +4 127 12 +4 128 -13 +4 129 14 +4 250 -15 +4 254 16 +4 256 -17