diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h --- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h @@ -92,6 +92,11 @@ }; // This x-macro includes all `V` types. +// TODO: We currently split out the non-variadic version from the variadic +// version. Using ##__VA_ARGS__ to avoid the split gives +// warning: token pasting of ',' and __VA_ARGS__ is a GNU extension +// [-Wgnu-zero-variadic-macro-arguments] +// and __VA_OPT__(, ) __VA_ARGS__ requires c++20. #define MLIR_SPARSETENSOR_FOREVERY_V(DO) \ DO(F64, double) \ DO(F32, float) \ @@ -104,6 +109,27 @@ DO(C64, complex64) \ DO(C32, complex32) +// This x-macro includes all `V` types and supports variadic arguments. +#define MLIR_SPARSETENSOR_FOREVERY_V_VAR(DO, ...) \ + DO(F64, double, __VA_ARGS__) \ + DO(F32, float, __VA_ARGS__) \ + DO(F16, f16, __VA_ARGS__) \ + DO(BF16, bf16, __VA_ARGS__) \ + DO(I64, int64_t, __VA_ARGS__) \ + DO(I32, int32_t, __VA_ARGS__) \ + DO(I16, int16_t, __VA_ARGS__) \ + DO(I8, int8_t, __VA_ARGS__) \ + DO(C64, complex64, __VA_ARGS__) \ + DO(C32, complex32, __VA_ARGS__) + +// This x-macro calls its argument on every pair of overhead and `V` types. +#define MLIR_SPARSETENSOR_FOREVERY_V_O(DO) \ + MLIR_SPARSETENSOR_FOREVERY_V_VAR(DO, 64, uint64_t) \ + MLIR_SPARSETENSOR_FOREVERY_V_VAR(DO, 32, uint32_t) \ + MLIR_SPARSETENSOR_FOREVERY_V_VAR(DO, 16, uint16_t) \ + MLIR_SPARSETENSOR_FOREVERY_V_VAR(DO, 8, uint8_t) \ + MLIR_SPARSETENSOR_FOREVERY_V_VAR(DO, 0, index_type) + constexpr bool isFloatingPrimaryType(PrimaryType valTy) { return PrimaryType::kF64 <= valTy && valTy <= PrimaryType::kBF16; } diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h @@ -249,6 +249,14 @@ return tensor; } + /// Reads the COO tensor from the file, stores the coordinates and values to + /// the given buffers, returns a boolean value to indicate whether the COO + /// elements are sorted. + /// Precondition: the buffers should have enough space to hold the elements. + template + bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, + C *lvlCoordinates, V *values); + private: /// Attempts to read a line from the file. Is private because there's /// no reason for client code to call it. @@ -287,6 +295,13 @@ void readCOOLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, SparseTensorCOO *lvlCOO); + /// The internal implementation of `readToBuffers`. We template over + /// `IsPattern` in order to perform LICM without needing to duplicate the + /// source code. + template + bool readToBuffersLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, + C *lvlCoordinates, V *values); + /// Reads the MME header of a general sparse matrix of type real. void readMMEHeader(); @@ -351,6 +366,69 @@ } } +template +bool SparseTensorReader::readToBuffers(uint64_t lvlRank, + const uint64_t *dim2lvl, + C *lvlCoordinates, V *values) { + assert(isValid() && "Attempt to readCOO() before readHeader()"); + const uint64_t dimRank = getRank(); + assert(lvlRank == dimRank && "Rank mismatch"); + detail::PermutationRef d2l(dimRank, dim2lvl); + // Do some manual LICM, to avoid assertions in the for-loop. + bool isSorted = + isPattern() + ? readToBuffersLoop(lvlRank, d2l, lvlCoordinates, values) + : readToBuffersLoop(lvlRank, d2l, lvlCoordinates, + values); + + // Close the file and return isSorted. + closeFile(); + return isSorted; +} + +template +bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank, + detail::PermutationRef dim2lvl, + C *lvlCoordinates, V *values) { + const uint64_t dimRank = getRank(); + const uint64_t nse = getNNZ(); + std::vector dimCoords(dimRank); + // Read the first element with isSorted=false as a way to avoid accessing its + // previous element. + bool isSorted = false; + char *linePtr; + // We inline `readCOOElement` here in order to avoid redundant assertions, + // since they're guaranteed by the call to `isValid()` and the construction + // of `dimCoords` above. + auto readElement = [&]() { + linePtr = readCOOIndices(dimCoords.data()); + dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoordinates); + *values = detail::readCOOValue(&linePtr); + if (isSorted) { + // Note that isSorted was set to false while reading the first element, + // to guarantee the safeness of using prevLvlCoords. + C *prevLvlCoords = lvlCoordinates - lvlRank; + // TODO: define a new CoordsLT which is like ElementLT but doesn't have + // the V parameter, and use it here. + for (uint64_t l = 0; l < lvlRank; ++l) { + if (prevLvlCoords[l] != lvlCoordinates[l]) { + if (prevLvlCoords[l] > lvlCoordinates[l]) + isSorted = false; + break; + } + } + } + lvlCoordinates += lvlRank; + ++values; + }; + readElement(); + isSorted = true; + for (uint64_t n = 1; n < nse; ++n) + readElement(); + + return isSorted; +} + /// Writes the sparse tensor to `filename` in extended FROSTT format. template inline void writeExtFROSTT(const SparseTensorCOO &coo, diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -283,6 +283,17 @@ MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETNEXT) #undef DECL_GETNEXT +/// Reads the sparse tensor, stores the coordinates and values to the given +/// memrefs. Returns a boolean value to indicate whether the COO elements are +/// sorted. +#define DECL_GETNEXT(VNAME, V, CNAME, C) \ + MLIR_CRUNNERUTILS_EXPORT bool \ + _mlir_ciface_getSparseTensorReaderRead##CNAME##VNAME( \ + void *p, StridedMemRefType *dim2lvlRef, \ + StridedMemRefType *iref, StridedMemRefType *vref) \ + MLIR_SPARSETENSOR_FOREVERY_V_O(DECL_GETNEXT) +#undef DECL_GETNEXT + using SparseTensorWriter = std::ostream; /// Creates a SparseTensorWriter for outputing a sparse tensor to a file with diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -631,6 +631,33 @@ MLIR_SPARSETENSOR_FOREVERY_V(IMPL_GETNEXT) #undef IMPL_GETNEXT +#define IMPL_GETNEXT(VNAME, V, CNAME, C) \ + bool _mlir_ciface_getSparseTensorReaderRead##CNAME##VNAME( \ + void *p, StridedMemRefType *dim2lvlRef, \ + StridedMemRefType *cref, StridedMemRefType *vref) { \ + assert(p); \ + auto &reader = *static_cast(p); \ + ASSERT_NO_STRIDE(cref); \ + ASSERT_NO_STRIDE(vref); \ + ASSERT_NO_STRIDE(dim2lvlRef); \ + const uint64_t cSize = MEMREF_GET_USIZE(cref); \ + const uint64_t vSize = MEMREF_GET_USIZE(vref); \ + const uint64_t lvlRank = reader.getRank(); \ + assert(vSize *lvlRank <= cSize); \ + assert(vSize >= reader.getNNZ() && "Not enough space in buffers"); \ + ASSERT_USIZE_EQ(dim2lvlRef, lvlRank); \ + (void)cSize; \ + (void)vSize; \ + (void)lvlRank; \ + C *lvlCoordinates = MEMREF_GET_PAYLOAD(cref); \ + V *values = MEMREF_GET_PAYLOAD(vref); \ + index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ + return reader.readToBuffers(lvlRank, dim2lvl, lvlCoordinates, \ + values); \ + } +MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) +#undef IMPL_GETNEXT + void *_mlir_ciface_newSparseTensorFromReader( void *p, StridedMemRefType *lvlSizesRef, StridedMemRefType *lvlTypesRef, diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_file_io.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_file_io.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_file_io.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_file_io.mlir @@ -43,6 +43,9 @@ func.func private @getSparseTensorReaderIsSymmetric(!TensorReader) -> (i1) func.func private @copySparseTensorReaderDimSizes(!TensorReader, memref) -> () attributes { llvm.emit_c_interface } + func.func private @getSparseTensorReaderRead0F32(!TensorReader, + memref, memref, memref) + -> (i1) attributes { llvm.emit_c_interface } func.func private @getSparseTensorReaderNextF32(!TensorReader, memref, memref) -> () attributes { llvm.emit_c_interface } @@ -60,6 +63,14 @@ return } + func.func @dumpi2(%arg0: memref>) { + %c0 = arith.constant 0 : index + %v = vector.transfer_read %arg0[%c0], %c0 : + memref>, vector<17xindex> + vector.print %v : vector<17xindex> + return + } + func.func @dumpf(%arg0: memref) { %c0 = arith.constant 0 : index %d0 = arith.constant 0.0 : f32 @@ -70,39 +81,31 @@ // Returns the indices and values of the tensor. func.func @readTensorFile(%tensor: !TensorReader) - -> (memref, memref, memref) { + -> (memref, memref, i1) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index %rank = call @getSparseTensorReaderRank(%tensor) : (!TensorReader) -> index %nnz = call @getSparseTensorReaderNNZ(%tensor) : (!TensorReader) -> index // Assume rank == 2. - %x0s = memref.alloc(%nnz) : memref - %x1s = memref.alloc(%nnz) : memref + %isize = arith.muli %c2, %nnz : index + %xs = memref.alloc(%isize) : memref %vs = memref.alloc(%nnz) : memref - %indices = memref.alloc(%rank) : memref - %value = memref.alloca() : memref - scf.for %i = %c0 to %nnz step %c1 { - func.call @getSparseTensorReaderNextF32(%tensor, %indices, %value) - : (!TensorReader, memref, memref) -> () - // TODO: can we use memref.subview to avoid the need for the %value - // buffer? - %v = memref.load %value[] : memref - memref.store %v, %vs[%i] : memref - %i0 = memref.load %indices[%c0] : memref - memref.store %i0, %x0s[%i] : memref - %i1 = memref.load %indices[%c1] : memref - memref.store %i1, %x1s[%i] : memref - } - - // Release the resource for the indices. - memref.dealloc %indices : memref - return %x0s, %x1s, %vs : memref, memref, memref + %dim2lvl = memref.alloca(%c2) : memref + memref.store %c0, %dim2lvl[%c0] : memref + memref.store %c1, %dim2lvl[%c1] : memref + %isSorted =func.call @getSparseTensorReaderRead0F32(%tensor, %dim2lvl, %xs, %vs) + : (!TensorReader, memref, memref, memref) -> (i1) + return %xs, %vs, %isSorted : memref, memref, i1 } // Reads a COO tensor from the given file name and prints its content. func.func @readTensorFileAndDump(%fileName: !Filename) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index %tensor = call @createSparseTensorReader(%fileName) : (!Filename) -> (!TensorReader) %rank = call @getSparseTensorReaderRank(%tensor) : (!TensorReader) -> index @@ -116,18 +119,22 @@ func.call @copySparseTensorReaderDimSizes(%tensor, %dimSizes) : (!TensorReader, memref) -> () call @dumpi(%dimSizes) : (memref) -> () - %x0s, %x1s, %vs = call @readTensorFile(%tensor) - : (!TensorReader) -> (memref, memref, memref) - call @dumpi(%x0s) : (memref) -> () - call @dumpi(%x1s) : (memref) -> () + %xs, %vs, %isSorted = call @readTensorFile(%tensor) + : (!TensorReader) -> (memref, memref, i1) + %x0s = memref.subview %xs[%c0][%nnz][%c2] + : memref to memref> + %x1s = memref.subview %xs[%c1][%nnz][%c2] + : memref to memref> + vector.print %isSorted : i1 + call @dumpi2(%x0s) : (memref>) -> () + call @dumpi2(%x1s) : (memref>) -> () call @dumpf(%vs) : (memref) -> () // Release the resources. call @delSparseTensorReader(%tensor) : (!TensorReader) -> () memref.dealloc %dimSizes : memref - memref.dealloc %x0s : memref - memref.dealloc %x1s : memref + memref.dealloc %xs : memref memref.dealloc %vs : memref return @@ -184,6 +191,7 @@ // CHECK: 17 // CHECK: 0 // CHECK: ( 4, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK: 1 // CHECK: ( 0, 0, 0, 0, 1, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 ) // CHECK: ( 0, 126, 127, 254, 1, 253, 2, 0, 1, 3, 98, 126, 127, 128, 249, 253, 255 ) // CHECK: ( -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17 ) @@ -215,4 +223,4 @@ return } -} +} \ No newline at end of file