diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h --- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h @@ -35,6 +35,15 @@ #include #include +// Silence 'warning C4002: 'too many arguments for function-liked macro +// invocation' +// as MSVC handles ##__VA_ARGS__ differently as gcc/clang. + +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(push) +#pragma warning(disable : 4002) +#endif + namespace mlir { namespace sparse_tensor { @@ -92,17 +101,25 @@ }; // This x-macro includes all `V` types. -#define MLIR_SPARSETENSOR_FOREVERY_V(DO) \ - DO(F64, double) \ - DO(F32, float) \ - DO(F16, f16) \ - DO(BF16, bf16) \ - DO(I64, int64_t) \ - DO(I32, int32_t) \ - DO(I16, int16_t) \ - DO(I8, int8_t) \ - DO(C64, complex64) \ - DO(C32, complex32) +#define MLIR_SPARSETENSOR_FOREVERY_V(DO, ...) \ + DO(F64, double, ##__VA_ARGS__) \ + DO(F32, float, ##__VA_ARGS__) \ + DO(F16, f16, ##__VA_ARGS__) \ + DO(BF16, bf16, ##__VA_ARGS__) \ + DO(I64, int64_t, ##__VA_ARGS__) \ + DO(I32, int32_t, ##__VA_ARGS__) \ + DO(I16, int16_t, ##__VA_ARGS__) \ + DO(I8, int8_t, ##__VA_ARGS__) \ + DO(C64, complex64, ##__VA_ARGS__) \ + DO(C32, complex32, ##__VA_ARGS__) + +// This x-macro calls its argument on every pair of overhead and `V` types. +#define MLIR_SPARSETENSOR_FOREVERY_V_O(DO) \ + MLIR_SPARSETENSOR_FOREVERY_V(DO, 64, uint64_t) \ + MLIR_SPARSETENSOR_FOREVERY_V(DO, 32, uint32_t) \ + MLIR_SPARSETENSOR_FOREVERY_V(DO, 16, uint16_t) \ + MLIR_SPARSETENSOR_FOREVERY_V(DO, 8, uint8_t) \ + MLIR_SPARSETENSOR_FOREVERY_V(DO, 0, index_type) constexpr bool isFloatingPrimaryType(PrimaryType valTy) { return PrimaryType::kF64 <= valTy && valTy <= PrimaryType::kBF16; @@ -357,4 +374,9 @@ } // namespace sparse_tensor } // namespace mlir +// Restore warning status. +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(pop) +#endif + #endif // MLIR_DIALECT_SPARSETENSOR_IR_ENUMS_H diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h @@ -249,6 +249,14 @@ return tensor; } + /// Reads the COO tensor, stores the indices, values and actual nnz to the + /// given buffers. The maximum number of nnz that can be hold by the buffers + /// is `maxNnz`. Returns a boolean value to indicate whether the COO elements + /// are sorted. + template + bool readCOO(I *indices, V *values, uint64_t *actualNnz, + const uint64_t *dim2lvl, uint64_t maxNnz); + private: /// Attempts to read a line from the file. Is private because there's /// no reason for client code to call it. @@ -287,6 +295,13 @@ void readCOOLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl, SparseTensorCOO *lvlCOO); + /// The internal implementation of `readCOO`. We template over + /// `IsPattern` and `IsSymmetric` in order to perform LICM without + /// needing to duplicate the source code. + template + bool readCOOLoop(I *indices, V *values, uint64_t *actualNnz, + const uint64_t *dim2lvl, uint64_t maxNnz); + /// Reads the MME header of a general sparse matrix of type real. void readMMEHeader(); @@ -366,6 +381,92 @@ } } +template +bool SparseTensorReader::readCOO(I *indices, V *values, uint64_t *actualNnz, + const uint64_t *dim2lvl, uint64_t maxNnz) { + assert(isValid() && "Attempt to readCOO() before readHeader()"); + // Do some manual LICM, to avoid assertions in the for-loop. + const bool IsPattern = isPattern(); + const bool IsSymmetric = (isSymmetric() && getRank() == 2); + bool isSorted; + if (IsPattern && IsSymmetric) + isSorted = readCOOLoop(indices, values, actualNnz, + dim2lvl, maxNnz); + else if (IsPattern) + isSorted = readCOOLoop(indices, values, actualNnz, + dim2lvl, maxNnz); + else if (IsSymmetric) + isSorted = readCOOLoop(indices, values, actualNnz, + dim2lvl, maxNnz); + else + isSorted = readCOOLoop(indices, values, actualNnz, + dim2lvl, maxNnz); + // Close the file and return isSorted. + closeFile(); + return isSorted; +} + +template +bool SparseTensorReader::readCOOLoop(I *indices, V *values, uint64_t *actualNnz, + const uint64_t *dim2lvl, uint64_t maxNnz) { + const uint64_t dimRank = getRank(); + const uint64_t nnz = getNNZ(); + detail::PermutationRef d2l(dimRank, dim2lvl); + std::vector dimInd(dimRank); + // Assume unsorted for symmetric. + bool isSorted = !IsSymmetric; + // We inline `readCOOElement` here in order to avoid redundant + // assertions, since they're guaranteed by the call to `isValid()` + // and the construction of `dimInd` above. + char *linePtr; + uint64_t n = 0; + auto advanceOneElement = [&]() { + indices += dimRank; + ++values; + ++n; + }; + auto getOneElement = [&]() { + linePtr = readCOOIndices(dimInd.data()); + d2l.pushforward(dimRank, dimInd.data(), indices); + *values = detail::readCOOValue(&linePtr); + }; + + getOneElement(); + advanceOneElement(); + for (uint64_t k = 1; k < nnz; ++k) { + assert(n < maxNnz); + getOneElement(); + if (isSorted) { + I *prevIndices = indices - dimRank; + for (uint64_t d = 0; d < dimRank; ++d) { + if (prevIndices[d] != indices[d]) { + isSorted = (prevIndices[d] < indices[d]); + break; + } + } + } + + // We currently chose to deal with symmetric matrices by fully + // constructing them. In the future, we may want to make symmetry + // implicit for storage reasons. + if constexpr (IsSymmetric) + if (indices[0] != indices[1]) { + assert(n < maxNnz); + I *prevIndices = indices; + V *prevValues = values; + advanceOneElement(); + indices[0] = prevIndices[1]; + indices[1] = prevIndices[0]; + values[0] = prevValues[0]; + } + + advanceOneElement(); + } + + *actualNnz = n; + return isSorted; +} + /// Writes the sparse tensor to `filename` in extended FROSTT format. template inline void writeExtFROSTT(const SparseTensorCOO &coo, diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -283,6 +283,19 @@ MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETNEXT) #undef DECL_GETNEXT +/// Reads the sparse tensor, stores the indices, values and the actual nnz to +/// the given memrefs. Returns a boolean value to indicate whether the COO +/// elements are sorted. +#define DECL_GETNEXT(VNAME, V, INAME, I) \ + MLIR_CRUNNERUTILS_EXPORT bool \ + _mlir_ciface_getSparseTensorReaderRead##INAME##VNAME( \ + void *p, StridedMemRefType *iref, \ + StridedMemRefType *vref, \ + StridedMemRefType *nref); \ + StridedMemRefType *dim2lvlRef); \ + MLIR_SPARSETENSOR_FOREVERY_V_O(DECL_GETNEXT) +#undef DECL_GETNEXT + using SparseTensorWriter = std::ostream; /// Creates a SparseTensorWriter for outputing a sparse tensor to a file with diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -513,12 +513,12 @@ assert(lvlCOO &&vref); \ ASSERT_NO_STRIDE(dimIndRef); \ ASSERT_NO_STRIDE(dim2lvlRef); \ - const uint64_t rank = MEMREF_GET_USIZE(dimIndRef); \ - ASSERT_USIZE_EQ(dim2lvlRef, rank); \ + const uint64_t dimRank = MEMREF_GET_USIZE(dimIndRef); \ + ASSERT_USIZE_EQ(dim2lvlRef, dimRank); \ const index_type *dimInd = MEMREF_GET_PAYLOAD(dimIndRef); \ const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ - std::vector lvlInd(rank); \ - for (uint64_t d = 0; d < rank; ++d) \ + std::vector lvlInd(dimRank); \ + for (uint64_t d = 0; d < dimRank; ++d) \ lvlInd[dim2lvl[d]] = dimInd[d]; \ V *value = MEMREF_GET_PAYLOAD(vref); \ static_cast *>(lvlCOO)->add(lvlInd, *value); \ @@ -631,6 +631,35 @@ MLIR_SPARSETENSOR_FOREVERY_V(IMPL_GETNEXT) #undef IMPL_GETNEXT +#define IMPL_GETNEXT(VNAME, V, INAME, I) \ + bool _mlir_ciface_getSparseTensorReaderRead##INAME##VNAME( \ + void *p, StridedMemRefType *iref, StridedMemRefType *vref, \ + StridedMemRefType *nref, \ + StridedMemRefType *dim2lvlRef) { \ + assert(p &&iref &&vref &&dim2lvlRef &&nref); \ + auto &reader = *static_cast(p); \ + ASSERT_NO_STRIDE(iref); \ + ASSERT_NO_STRIDE(vref); \ + ASSERT_NO_STRIDE(dim2lvlRef); \ + const uint64_t is = MEMREF_GET_USIZE(iref); \ + const uint64_t vs = MEMREF_GET_USIZE(vref); \ + const uint64_t rank = reader.getRank(); \ + assert(vs *rank == is); \ + const uint64_t ps = MEMREF_GET_USIZE(dim2lvlRef); \ + assert(ps == rank); \ + (void)is; \ + (void)vs; \ + (void)rank; \ + (void)ps; \ + I *indices = MEMREF_GET_PAYLOAD(iref); \ + V *values = MEMREF_GET_PAYLOAD(vref); \ + index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \ + index_type *n = MEMREF_GET_PAYLOAD(nref); \ + return reader.readCOO(indices, values, n, dim2lvl, vs); \ + } +MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT) +#undef IMPL_GETNEXT + void *_mlir_ciface_newSparseTensorFromReader( void *p, StridedMemRefType *lvlSizesRef, StridedMemRefType *lvlTypesRef, diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_file_io.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_file_io.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_file_io.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_file_io.mlir @@ -43,6 +43,9 @@ func.func private @getSparseTensorReaderIsSymmetric(!TensorReader) -> (i1) func.func private @copySparseTensorReaderDimSizes(!TensorReader, memref) -> () attributes { llvm.emit_c_interface } + func.func private @getSparseTensorReaderRead0F32(!TensorReader, + memref, memref, memref, memref) + -> (i1) attributes { llvm.emit_c_interface } func.func private @getSparseTensorReaderNextF32(!TensorReader, memref, memref) -> () attributes { llvm.emit_c_interface } @@ -60,6 +63,14 @@ return } + func.func @dumpi2(%arg0: memref>) { + %c0 = arith.constant 0 : index + %v = vector.transfer_read %arg0[%c0], %c0 : + memref>, vector<17xindex> + vector.print %v : vector<17xindex> + return + } + func.func @dumpf(%arg0: memref) { %c0 = arith.constant 0 : index %d0 = arith.constant 0.0 : f32 @@ -70,39 +81,33 @@ // Returns the indices and values of the tensor. func.func @readTensorFile(%tensor: !TensorReader) - -> (memref, memref, memref) { + -> (memref, memref, index, i1) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index %rank = call @getSparseTensorReaderRank(%tensor) : (!TensorReader) -> index %nnz = call @getSparseTensorReaderNNZ(%tensor) : (!TensorReader) -> index // Assume rank == 2. - %x0s = memref.alloc(%nnz) : memref - %x1s = memref.alloc(%nnz) : memref + %isize = arith.muli %c2, %nnz : index + %xs = memref.alloc(%isize) : memref %vs = memref.alloc(%nnz) : memref - %indices = memref.alloc(%rank) : memref - %value = memref.alloca() : memref - scf.for %i = %c0 to %nnz step %c1 { - func.call @getSparseTensorReaderNextF32(%tensor, %indices, %value) - : (!TensorReader, memref, memref) -> () - // TODO: can we use memref.subview to avoid the need for the %value - // buffer? - %v = memref.load %value[] : memref - memref.store %v, %vs[%i] : memref - %i0 = memref.load %indices[%c0] : memref - memref.store %i0, %x0s[%i] : memref - %i1 = memref.load %indices[%c1] : memref - memref.store %i1, %x1s[%i] : memref - } - - // Release the resource for the indices. - memref.dealloc %indices : memref - return %x0s, %x1s, %vs : memref, memref, memref + %dim2lvl = memref.alloca(%c2) : memref + memref.store %c0, %dim2lvl[%c0] : memref + memref.store %c1, %dim2lvl[%c1] : memref + %n = memref.alloca() : memref + %isSorted =func.call @getSparseTensorReaderRead0F32(%tensor, %xs, %vs, %n, %dim2lvl) + : (!TensorReader, memref, memref, memref, memref) -> (i1) + %nnz2 = memref.load %n[] : memref + return %xs, %vs, %nnz2, %isSorted : memref, memref, index, i1 } // Reads a COO tensor from the given file name and prints its content. func.func @readTensorFileAndDump(%fileName: !Filename) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index %tensor = call @createSparseTensorReader(%fileName) : (!Filename) -> (!TensorReader) %rank = call @getSparseTensorReaderRank(%tensor) : (!TensorReader) -> index @@ -116,18 +121,23 @@ func.call @copySparseTensorReaderDimSizes(%tensor, %dimSizes) : (!TensorReader, memref) -> () call @dumpi(%dimSizes) : (memref) -> () - %x0s, %x1s, %vs = call @readTensorFile(%tensor) - : (!TensorReader) -> (memref, memref, memref) - call @dumpi(%x0s) : (memref) -> () - call @dumpi(%x1s) : (memref) -> () + %xs, %vs, %nnz2, %isSorted = call @readTensorFile(%tensor) + : (!TensorReader) -> (memref, memref, index, i1) + %x0s = memref.subview %xs[%c0][%nnz2][%c2] + : memref to memref> + %x1s = memref.subview %xs[%c1][%nnz2][%c2] + : memref to memref> + vector.print %isSorted : i1 + vector.print %nnz2 : index + call @dumpi2(%x0s) : (memref>) -> () + call @dumpi2(%x1s) : (memref>) -> () call @dumpf(%vs) : (memref) -> () // Release the resources. call @delSparseTensorReader(%tensor) : (!TensorReader) -> () memref.dealloc %dimSizes : memref - memref.dealloc %x0s : memref - memref.dealloc %x1s : memref + memref.dealloc %xs : memref memref.dealloc %vs : memref return @@ -184,6 +194,8 @@ // CHECK: 17 // CHECK: 0 // CHECK: ( 4, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK: 1 + // CHECK: 17 // CHECK: ( 0, 0, 0, 0, 1, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 ) // CHECK: ( 0, 126, 127, 254, 1, 253, 2, 0, 1, 3, 98, 126, 127, 128, 249, 253, 255 ) // CHECK: ( -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17 )