diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h --- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h @@ -104,6 +104,28 @@ DO(C64, complex64) \ DO(C32, complex32) +// This x-macro calls `DO` on every pair of overhead and `V` types, given an +// overhead type. +#define MLIR_SPARSETENSOR_O_FOREVERY_V(INAME, I, DO) \ + DO(INAME, I, F64, double) \ + DO(INAME, I, F32, float) \ + DO(INAME, I, F16, f16) \ + DO(INAME, I, BF16, bf16) \ + DO(INAME, I, I64, int64_t) \ + DO(INAME, I, I32, int32_t) \ + DO(INAME, I, I16, int16_t) \ + DO(INAME, I, I8, int8_t) \ + DO(INAME, I, C64, complex64) \ + DO(INAME, I, C32, complex32) + +// This x-macro calls its argument on every pair of overhead and `V` types. +#define MLIR_SPARSETENSOR_FOREVERY_O_V(DO) \ + MLIR_SPARSETENSOR_O_FOREVERY_V(64, uint64_t, DO) \ + MLIR_SPARSETENSOR_O_FOREVERY_V(32, uint32_t, DO) \ + MLIR_SPARSETENSOR_O_FOREVERY_V(16, uint16_t, DO) \ + MLIR_SPARSETENSOR_O_FOREVERY_V(8, uint8_t, DO) \ + MLIR_SPARSETENSOR_O_FOREVERY_V(0, index_type, DO) + constexpr bool isFloatingPrimaryType(PrimaryType valTy) { return PrimaryType::kF64 <= valTy && valTy <= PrimaryType::kBF16; } diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h @@ -249,6 +249,14 @@ return tensor; } + /// Reads the COO tensor, stores the indices, values and actual nnz to the + /// given buffers. The maximum number of nnz that can be hold by the buffers + /// is `maxNnz`. Returns a boolean value to indicate whether the COO elements + /// are sorted. + template + bool readCOO(I *indices, V *values, uint64_t *actualNnz, + const uint64_t *dim2lvl, uint64_t maxNnz); + private: /// Attempts to read a line from the file. Is private because there's /// no reason for client code to call it. @@ -286,6 +294,13 @@ void readCOOLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, SparseTensorCOO *lvlCOO); + /// The internal implementation of `readCOO`. We template over + /// `IsPattern` and `IsSymmetric` in order to perform LICM without + /// needing to duplicate the source code. + template + bool readCOOLoop(I *indices, V *values, uint64_t *actualNnz, + const uint64_t *dim2lvl, uint64_t maxNnz); + /// Reads the MME header of a general sparse matrix of type real. void readMMEHeader(); @@ -365,6 +380,86 @@ } } +template +bool SparseTensorReader::readCOO(I *indices, V *values, uint64_t *actualNnz, + const uint64_t *dim2lvl, uint64_t maxNnz) { + assert(isValid() && "Attempt to readCOO() before readHeader()"); + // Do some manual LICM, to avoid assertions in the for-loop. + const bool IsPattern = isPattern(); + const bool IsSymmetric = (isSymmetric() && getRank() == 2); + bool isSorted; + if (IsPattern && IsSymmetric) + isSorted = readCOOLoop(indices, values, actualNnz, + dim2lvl, maxNnz); + else if (IsPattern) + isSorted = readCOOLoop(indices, values, actualNnz, + dim2lvl, maxNnz); + else if (IsSymmetric) + isSorted = readCOOLoop(indices, values, actualNnz, + dim2lvl, maxNnz); + else + isSorted = readCOOLoop(indices, values, actualNnz, + dim2lvl, maxNnz); + // Close the file and return isSorted. + closeFile(); + return isSorted; +} + +template +bool SparseTensorReader::readCOOLoop(I *indices, V *values, uint64_t *actualNnz, + const uint64_t *dim2lvl, uint64_t maxNnz) { + const uint64_t rank = getRank(); + const uint64_t nnz = getNNZ(); + detail::PermutationRef d2l(rank, dim2lvl); + std::vector dimInd(rank); + // Assume unsorted for symmetric. + bool isSorted = IsSymmetric ? false : true; + // We inline `readCOOElement` here in order to avoid redundant + // assertions, since they're guaranteed by the call to `isValid()` + // and the construction of `dimInd` above. + I *pIndices = indices; + V *pValues = values; + char *linePtr; + auto getOneElement = [&]() { + linePtr = readCOOIndices(dimInd.data()); + d2l.pushforward(rank, dimInd.data(), pIndices); + *pValues = detail::readCOOValue(&linePtr); + }; + getOneElement(); + uint64_t n = 1; + for (uint64_t k = 1; k < nnz; ++k) { + assert(n < maxNnz); + pIndices = indices + (n * rank); + pValues = values + n; + getOneElement(); + if (isSorted) { + I *prev = pIndices - rank; + for (uint64_t d = 0; d < rank; ++d) { + if (prev[d] != pIndices[d]) { + isSorted = (prev[d] < pIndices[d]); + break; + } + } + } + n++; + + // We currently chose to deal with symmetric matrices by fully + // constructing them. In the future, we may want to make symmetry + // implicit for storage reasons. + if constexpr (IsSymmetric) + if (pIndices[0] != pIndices[1]) { + assert(n < maxNnz); + pIndices[2] = pIndices[1]; + pIndices[3] = pIndices[0]; + pValues[1] = pValues[0]; + n++; + } + } + + *actualNnz = n; + return isSorted; +} + /// Writes the sparse tensor to `filename` in extended FROSTT format. template inline void writeExtFROSTT(const SparseTensorCOO &coo, diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -283,6 +283,19 @@ MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETNEXT) #undef DECL_GETNEXT +/// Reads the sparse tensor, stores the indices, values and the actual nnz to +/// the given memrefs. Returns a boolean value to indicate whether the COO +/// elements are sorted. +#define DECL_GETNEXT(INAME, I, VNAME, V) \ + MLIR_CRUNNERUTILS_EXPORT bool \ + _mlir_ciface_getSparseTensorReaderRead##INAME##VNAME( \ + void *p, StridedMemRefType *iref, \ + StridedMemRefType *vref, \ + StridedMemRefType *nref); \ + StridedMemRefType *dim2lvlRef); \ + MLIR_SPARSETENSOR_FOREVERY_O_V(DECL_GETNEXT) +#undef DECL_GETNEXT + using SparseTensorWriter = std::ostream; /// Creates a SparseTensorWriter for outputing a sparse tensor to a file with diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -631,6 +631,35 @@ MLIR_SPARSETENSOR_FOREVERY_V(IMPL_GETNEXT) #undef IMPL_GETNEXT +#define IMPL_GETNEXT(INAME, I, VNAME, V) \ + bool _mlir_ciface_getSparseTensorReaderRead##INAME##VNAME( \ + void *p, StridedMemRefType *iref, StridedMemRefType *vref, \ + StridedMemRefType *nref, \ + StridedMemRefType *dim2lvlRef) { \ + assert(p &&iref &&vref &&dim2lvlRef &&nref); \ + auto &reader = *static_cast(p); \ + ASSERT_NO_STRIDE(iref); \ + ASSERT_NO_STRIDE(vref); \ + ASSERT_NO_STRIDE(dim2lvlRef); \ + const uint64_t is = MEMREF_GET_USIZE(iref); \ + const uint64_t vs = MEMREF_GET_USIZE(vref); \ + const uint64_t rank = reader.getRank(); \ + assert(vs *rank == is); \ + const uint64_t ps = MEMREF_GET_USIZE(dim2lvlRef); \ + assert(ps == rank); \ + (void)is; \ + (void)vs; \ + (void)rank; \ + (void)ps; \ + I *indices = MEMREF_GET_PAYLOAD(iref); \ + V *values = MEMREF_GET_PAYLOAD(vref); \ + index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ + index_type *n = MEMREF_GET_PAYLOAD(nref); \ + return reader.readCOO(indices, values, n, dim2lvl, vs); \ + } +MLIR_SPARSETENSOR_FOREVERY_O_V(IMPL_GETNEXT) +#undef IMPL_GETNEXT + void *_mlir_ciface_newSparseTensorFromReader( void *p, StridedMemRefType *lvlSizesRef, StridedMemRefType *lvlTypesRef, diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_file_io.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_file_io.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_file_io.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_file_io.mlir @@ -43,6 +43,9 @@ func.func private @getSparseTensorReaderIsSymmetric(!TensorReader) -> (i1) func.func private @copySparseTensorReaderDimSizes(!TensorReader, memref) -> () attributes { llvm.emit_c_interface } + func.func private @getSparseTensorReaderRead0F32(!TensorReader, + memref, memref, memref, memref) + -> (i1) attributes { llvm.emit_c_interface } func.func private @getSparseTensorReaderNextF32(!TensorReader, memref, memref) -> () attributes { llvm.emit_c_interface } @@ -60,6 +63,14 @@ return } + func.func @dumpi2(%arg0: memref>) { + %c0 = arith.constant 0 : index + %v = vector.transfer_read %arg0[%c0], %c0 : + memref>, vector<17xindex> + vector.print %v : vector<17xindex> + return + } + func.func @dumpf(%arg0: memref) { %c0 = arith.constant 0 : index %d0 = arith.constant 0.0 : f32 @@ -70,39 +81,33 @@ // Returns the indices and values of the tensor. func.func @readTensorFile(%tensor: !TensorReader) - -> (memref, memref, memref) { + -> (memref, memref, index, i1) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index %rank = call @getSparseTensorReaderRank(%tensor) : (!TensorReader) -> index %nnz = call @getSparseTensorReaderNNZ(%tensor) : (!TensorReader) -> index // Assume rank == 2. - %x0s = memref.alloc(%nnz) : memref - %x1s = memref.alloc(%nnz) : memref + %isize = arith.muli %c2, %nnz : index + %xs = memref.alloc(%isize) : memref %vs = memref.alloc(%nnz) : memref - %indices = memref.alloc(%rank) : memref - %value = memref.alloca() : memref - scf.for %i = %c0 to %nnz step %c1 { - func.call @getSparseTensorReaderNextF32(%tensor, %indices, %value) - : (!TensorReader, memref, memref) -> () - // TODO: can we use memref.subview to avoid the need for the %value - // buffer? - %v = memref.load %value[] : memref - memref.store %v, %vs[%i] : memref - %i0 = memref.load %indices[%c0] : memref - memref.store %i0, %x0s[%i] : memref - %i1 = memref.load %indices[%c1] : memref - memref.store %i1, %x1s[%i] : memref - } - - // Release the resource for the indices. - memref.dealloc %indices : memref - return %x0s, %x1s, %vs : memref, memref, memref + %dim2lvl = memref.alloca(%c2) : memref + memref.store %c0, %dim2lvl[%c0] : memref + memref.store %c1, %dim2lvl[%c1] : memref + %n = memref.alloca() : memref + %isSorted =func.call @getSparseTensorReaderRead0F32(%tensor, %xs, %vs, %n, %dim2lvl) + : (!TensorReader, memref, memref, memref, memref) -> (i1) + %nnz2 = memref.load %n[] : memref + return %xs, %vs, %nnz2, %isSorted : memref, memref, index, i1 } // Reads a COO tensor from the given file name and prints its content. func.func @readTensorFileAndDump(%fileName: !Filename) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index %tensor = call @createSparseTensorReader(%fileName) : (!Filename) -> (!TensorReader) %rank = call @getSparseTensorReaderRank(%tensor) : (!TensorReader) -> index @@ -116,18 +121,23 @@ func.call @copySparseTensorReaderDimSizes(%tensor, %dimSizes) : (!TensorReader, memref) -> () call @dumpi(%dimSizes) : (memref) -> () - %x0s, %x1s, %vs = call @readTensorFile(%tensor) - : (!TensorReader) -> (memref, memref, memref) - call @dumpi(%x0s) : (memref) -> () - call @dumpi(%x1s) : (memref) -> () + %xs, %vs, %nnz2, %isSorted = call @readTensorFile(%tensor) + : (!TensorReader) -> (memref, memref, index, i1) + %x0s = memref.subview %xs[%c0][%nnz2][%c2] + : memref to memref> + %x1s = memref.subview %xs[%c1][%nnz2][%c2] + : memref to memref> + vector.print %isSorted : i1 + vector.print %nnz2 : index + call @dumpi2(%x0s) : (memref>) -> () + call @dumpi2(%x1s) : (memref>) -> () call @dumpf(%vs) : (memref) -> () // Release the resources. call @delSparseTensorReader(%tensor) : (!TensorReader) -> () memref.dealloc %dimSizes : memref - memref.dealloc %x0s : memref - memref.dealloc %x1s : memref + memref.dealloc %xs : memref memref.dealloc %vs : memref return @@ -184,6 +194,8 @@ // CHECK: 17 // CHECK: 0 // CHECK: ( 4, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK: 1 + // CHECK: 17 // CHECK: ( 0, 0, 0, 0, 1, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 ) // CHECK: ( 0, 126, 127, 254, 1, 253, 2, 0, 1, 3, 98, 126, 127, 128, 249, 253, 255 ) // CHECK: ( -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17 )