diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -53,6 +53,9 @@ assert(d < getRank() && "Dimension index is out of bounds"); #define ASSERT_COMPRESSED_DIM(d) \ assert(isCompressedDim(d) && "Dimension is not compressed"); +#define ASSERT_COMPRESSED_OR_SINGLETON_DIM(d) \ + assert((isCompressedDim(d) || isSingletonDim(d)) && \ + "Dimension is neither compressed nor singleton"); #define ASSERT_DENSE_DIM(d) assert(isDenseDim(d) && "Dimension is not dense"); /// Abstract base class for `SparseTensorStorage`. This class @@ -145,6 +148,7 @@ virtual void getIndices(std::vector **, uint64_t); FOREVERY_FIXED_O(DECL_GETINDICES) #undef DECL_GETINDICES + virtual uint64_t getIndex(uint64_t d, uint64_t pos) const = 0; /// Primary storage. #define DECL_GETVALUES(VNAME, V) virtual void getValues(std::vector **); @@ -254,6 +258,12 @@ } void getValues(std::vector **out) final { *out = &values; } + uint64_t getIndex(uint64_t d, uint64_t pos) const final { + ASSERT_COMPRESSED_OR_SINGLETON_DIM(d); + assert(pos < indices[d].size() && "Index position is out of bounds"); + return indices[d][pos]; // Converts the stored `I` into `uint64_t`. + } + /// Partially specialize lexicographical insertions based on template types. void lexInsert(const uint64_t *cursor, V val) final { // First, wrap up pending insertion path. @@ -376,7 +386,7 @@ /// does not check that `i` is semantically valid (i.e., in bounds /// for `dimSizes[d]` and not elsewhere occurring in the same segment). void writeIndex(uint64_t d, uint64_t pos, uint64_t i) { - ASSERT_COMPRESSED_DIM(d); + ASSERT_COMPRESSED_OR_SINGLETON_DIM(d); // Subscript assignment to `std::vector` requires that the `pos`-th // entry has been initialized; thus we must be sure to check `size()` // here, instead of `capacity()` as would be ideal. @@ -397,8 +407,11 @@ uint64_t assembledSize(uint64_t parentSz, uint64_t d) const { if (isCompressedDim(d)) return pointers[d][parentSz]; - // else if dense: - return parentSz * getDimSizes()[d]; + if (isSingletonDim(d)) + return parentSz; // New size is same as the parent. + if (isDenseDim(d)) + return parentSz * getDimSizes()[d]; + MLIR_SPARSETENSOR_FATAL("unsupported dimension level type"); } /// Initializes sparse tensor storage scheme from a memory-resident sparse @@ -446,7 +459,7 @@ if (isCompressedDim(d)) { appendPointer(d, indices[d].size(), count); } else if (isSingletonDim(d)) { - return; + return; // Nothing to finalize. } else { // Dense dimension. ASSERT_DENSE_DIM(d); const uint64_t sz = getDimSizes()[d]; @@ -475,8 +488,8 @@ /// Continues a single insertion path, outer to inner. void insPath(const uint64_t *cursor, uint64_t diff, uint64_t top, V val) { - ASSERT_VALID_DIM(diff); const uint64_t rank = getRank(); + assert(diff <= rank && "Dimension-diff is out of bounds"); for (uint64_t d = diff; d < rank; ++d) { const uint64_t i = cursor[d]; appendIndex(d, top, i); @@ -509,6 +522,7 @@ std::vector idx; // index cursor for lexicographic insertion. }; +#undef ASSERT_COMPRESSED_OR_SINGLETON_DIM #undef ASSERT_COMPRESSED_DIM #undef ASSERT_VALID_DIM @@ -637,7 +651,8 @@ forallElements(yield, pos, d + 1); } } else if (src.isSingletonDim(d)) { - MLIR_SPARSETENSOR_FATAL("unsupported dimension level type"); + this->cursor[this->reord[d]] = src.getIndex(d, parentPos); + forallElements(yield, parentPos, d + 1); } else { // Dense dimension. assert(src.isDenseDim(d)); // TODO: reuse the ASSERT_DENSE_DIM message const uint64_t sz = src.getDimSizes()[d]; @@ -740,7 +755,6 @@ #endif return new SparseTensorStorage(coosz, perm, sparsity, coo); } - // else std::vector permsz(rank); for (uint64_t r = 0; r < rank; ++r) { assert(shape[r] > 0 && "Dimension size zero has trivial storage"); @@ -848,7 +862,7 @@ // That is, in the yieldPos loop we need random-access assignment // to `indices[r]`; however, `std::vector`'s subscript-assignment // only allows assigning to already-initialized positions. - if (isCompressedDim(r)) + if (isCompressedDim(r) || isSingletonDim(r)) indices[r].resize(parentSz, 0); } values.resize(parentSz, 0); // Both allocate and zero-initialize. @@ -872,6 +886,7 @@ writeIndex(r, currentPos, ind[r]); parentPos = currentPos; } else if (isSingletonDim(r)) { + writeIndex(r, parentPos, ind[r]); // the new parentPos equals the old parentPos. } else { // Dense dimension. ASSERT_DENSE_DIM(r); @@ -899,10 +914,13 @@ } pointers[r][0] = 0; } + // Both dense and singleton are no-ops for the finalizeYieldPos loop parentSz = assembledSize(parentSz, r); } } +#undef ASSERT_DENSE_DIM + } // namespace sparse_tensor } // namespace mlir diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -449,17 +449,18 @@ ArrayRef dimTypes) { bool alreadyCompressed = false; for (uint64_t rank = dimTypes.size(), r = 0; r < rank; r++) { - switch (dimTypes[r]) { - case SparseTensorEncodingAttr::DimLevelType::Compressed: + const DimLevelType dlt = dimLevelTypeEncoding(dimTypes[r]); + if (isCompressedDLT(dlt)) { if (alreadyCompressed) return false; // Multiple compressed dimensions not yet supported. alreadyCompressed = true; - break; - case SparseTensorEncodingAttr::DimLevelType::Dense: + } else if (isDenseDLT(dlt)) { if (alreadyCompressed) return false; // Dense after Compressed not yet supported. - break; - default: // TODO: investigate + } else if (isSingletonDLT(dlt)) { + // Direct conversion doesn't have any particular problems with + // singleton after compressed. + } else { // TODO: investigate return false; } } diff --git a/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp b/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp --- a/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp @@ -31,28 +31,29 @@ const std::vector &sparsity) : dimSizes(dimSizes), dimTypes(sparsity), nnz(getRank()) { assert(dimSizes.size() == dimTypes.size() && "Rank mismatch"); - bool uncompressed = true; - (void)uncompressed; + bool alreadyCompressed = false; + (void)alreadyCompressed; uint64_t sz = 1; // the product of all `dimSizes` strictly less than `r`. for (uint64_t rank = getRank(), r = 0; r < rank; r++) { - switch (dimTypes[r]) { - case DimLevelType::kCompressed: - assert(uncompressed && - "Multiple compressed layers not currently supported"); - uncompressed = false; + const DimLevelType dlt = sparsity[r]; + if (isCompressedDLT(dlt)) { + if (alreadyCompressed) + MLIR_SPARSETENSOR_FATAL( + "Multiple compressed layers not currently supported"); + alreadyCompressed = true; nnz[r].resize(sz, 0); // Both allocate and zero-initialize. - break; - case DimLevelType::kDense: - assert(uncompressed && "Dense after compressed not currently supported"); - break; - case DimLevelType::kSingleton: + } else if (isDenseDLT(dlt)) { + if (alreadyCompressed) + MLIR_SPARSETENSOR_FATAL( + "Dense after compressed not currently supported"); + } else if (isSingletonDLT(dlt)) { // Singleton after Compressed causes no problems for allocating // `nnz` nor for the yieldPos loop. This remains true even // when adding support for multiple compressed dimensions or // for dense-after-compressed. - break; - default: - MLIR_SPARSETENSOR_FATAL("unsupported dimension level type"); + } else { + MLIR_SPARSETENSOR_FATAL("unsupported dimension level type: %d\n", + static_cast(dlt)); } sz = detail::checkedMul(sz, dimSizes[r]); } @@ -65,7 +66,7 @@ void SparseTensorNNZ::forallIndices(uint64_t stopDim, SparseTensorNNZ::NNZConsumer yield) const { assert(stopDim < getRank() && "Dimension out of bounds"); - assert(dimTypes[stopDim] == DimLevelType::kCompressed && + assert(isCompressedDLT(dimTypes[stopDim]) && "Cannot look up non-compressed dimensions"); forallIndices(yield, stopDim, 0, 0); } @@ -78,7 +79,7 @@ void SparseTensorNNZ::add(const std::vector &ind) { uint64_t parentPos = 0; for (uint64_t rank = getRank(), r = 0; r < rank; ++r) { - if (dimTypes[r] == DimLevelType::kCompressed) + if (isCompressedDLT(dimTypes[r])) nnz[r][parentPos]++; parentPos = parentPos * dimSizes[r] + ind[r]; } diff --git a/mlir/test/Dialect/SparseTensor/conversion_sparse2sparse.mlir b/mlir/test/Dialect/SparseTensor/conversion_sparse2sparse.mlir --- a/mlir/test/Dialect/SparseTensor/conversion_sparse2sparse.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion_sparse2sparse.mlir @@ -47,3 +47,45 @@ %0 = sparse_tensor.convert %arg0 : tensor to tensor return %0 : tensor } + +#SparseSingleton64 = #sparse_tensor.encoding<{ + dimLevelType = ["singleton"], + pointerBitWidth = 64, + indexBitWidth = 64 +}> + +#SparseSingleton32 = #sparse_tensor.encoding<{ + dimLevelType = ["singleton"], + pointerBitWidth = 32, + indexBitWidth = 32 +}> + +// CHECK-COO-LABEL: func @sparse_convert_singleton( +// CHECK-COO-SAME: %[[A:.*]]: !llvm.ptr) +// CHECK-COO-DAG: %[[ToCOO:.*]] = arith.constant 5 : i32 +// CHECK-COO-DAG: %[[FromCOO:.*]] = arith.constant 2 : i32 +// CHECK-COO-DAG: %[[P:.*]] = memref.alloca() : memref<1xi8> +// CHECK-COO-DAG: %[[Q:.*]] = memref.alloca() : memref<1xindex> +// CHECK-COO-DAG: %[[R:.*]] = memref.alloca() : memref<1xindex> +// CHECK-COO-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<1xi8> to memref +// CHECK-COO-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<1xindex> to memref +// CHECK-COO-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<1xindex> to memref +// CHECK-COO: %[[C:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ToCOO]], %[[A]]) +// CHECK-COO: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]]) +// CHECK-COO: call @delSparseTensorCOOF32(%[[C]]) +// CHECK-COO: return %[[T]] : !llvm.ptr +// CHECK-AUTO-LABEL: func @sparse_convert_singleton( +// CHECK-AUTO-SAME: %[[A:.*]]: !llvm.ptr) +// CHECK-AUTO-DAG: %[[SparseToSparse:.*]] = arith.constant 3 : i32 +// CHECK-AUTO-DAG: %[[P:.*]] = memref.alloca() : memref<1xi8> +// CHECK-AUTO-DAG: %[[Q:.*]] = memref.alloca() : memref<1xindex> +// CHECK-AUTO-DAG: %[[R:.*]] = memref.alloca() : memref<1xindex> +// CHECK-AUTO-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<1xi8> to memref +// CHECK-AUTO-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<1xindex> to memref +// CHECK-AUTO-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<1xindex> to memref +// CHECK-AUTO: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[SparseToSparse]], %[[A]]) +// CHECK-AUTO: return %[[T]] : !llvm.ptr +func.func @sparse_convert_singleton(%arg0: tensor) -> tensor { + %0 = sparse_tensor.convert %arg0 : tensor to tensor + return %0 : tensor +} diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2sparse.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2sparse.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2sparse.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2sparse.mlir @@ -19,6 +19,20 @@ dimOrdering = affine_map<(i,j,k) -> (i,k,j)> }> +#SingletonTensor1 = #sparse_tensor.encoding<{ + dimLevelType = [ "dense", "compressed", "singleton" ] +}> + +// This also checks the compressed->dense conversion (when there are zeros). +#SingletonTensor2 = #sparse_tensor.encoding<{ + dimLevelType = [ "dense", "dense", "singleton" ] +}> + +// This also checks the singleton->compressed conversion. +#SingletonTensor3 = #sparse_tensor.encoding<{ + dimLevelType = [ "dense", "dense", "compressed" ] +}> + module { // // Utilities for output and releasing memory. @@ -36,9 +50,9 @@ } // - // Main driver. + // The first test suite (for non-singleton DimLevelTypes). // - func.func @entry() { + func.func @testNonSingleton() { // // Initialize a 3-dim dense tensor. // @@ -97,4 +111,81 @@ return } + + // + // The second test suite (for singleton DimLevelTypes). + // + func.func @testSingleton() { + // + // Initialize a 3-dim dense tensor with the 3rd dim being singleton. + // + %src = arith.constant dense<[ + [ [ 1.0, 0.0, 0.0, 0.0 ], + [ 0.0, 6.0, 0.0, 0.0 ], + [ 0.0, 0.0, 11.0, 0.0 ] ], + [ [ 0.0, 14.0, 0.0, 0.0 ], + [ 0.0, 0.0, 0.0, 20.0 ], + [ 21.0, 0.0, 0.0, 0.0 ] ] + ]> : tensor<2x3x4xf64> + + // + // Convert dense tensor directly to various sparse tensors. + // + %s1 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor<2x3x4xf64, #SingletonTensor1> + %s2 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor<2x3x4xf64, #SingletonTensor2> + %s3 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor<2x3x4xf64, #SingletonTensor3> + + // + // Convert sparse tensor directly to another sparse format. + // + %t12 = sparse_tensor.convert %s1 : tensor<2x3x4xf64, #SingletonTensor1> to tensor<2x3x4xf64, #SingletonTensor2> + %t13 = sparse_tensor.convert %s1 : tensor<2x3x4xf64, #SingletonTensor1> to tensor<2x3x4xf64, #SingletonTensor3> + %t21 = sparse_tensor.convert %s2 : tensor<2x3x4xf64, #SingletonTensor2> to tensor<2x3x4xf64, #SingletonTensor1> + %t23 = sparse_tensor.convert %s2 : tensor<2x3x4xf64, #SingletonTensor2> to tensor<2x3x4xf64, #SingletonTensor3> + %t31 = sparse_tensor.convert %s3 : tensor<2x3x4xf64, #SingletonTensor3> to tensor<2x3x4xf64, #SingletonTensor1> + %t32 = sparse_tensor.convert %s3 : tensor<2x3x4xf64, #SingletonTensor3> to tensor<2x3x4xf64, #SingletonTensor2> + + // + // Convert sparse tensor back to dense. + // + %d12 = sparse_tensor.convert %t12 : tensor<2x3x4xf64, #SingletonTensor2> to tensor<2x3x4xf64> + %d13 = sparse_tensor.convert %t13 : tensor<2x3x4xf64, #SingletonTensor3> to tensor<2x3x4xf64> + %d21 = sparse_tensor.convert %t21 : tensor<2x3x4xf64, #SingletonTensor1> to tensor<2x3x4xf64> + %d23 = sparse_tensor.convert %t23 : tensor<2x3x4xf64, #SingletonTensor3> to tensor<2x3x4xf64> + %d31 = sparse_tensor.convert %t31 : tensor<2x3x4xf64, #SingletonTensor1> to tensor<2x3x4xf64> + %d32 = sparse_tensor.convert %t32 : tensor<2x3x4xf64, #SingletonTensor2> to tensor<2x3x4xf64> + + // + // Check round-trip equality. And release dense tensors. + // + // CHECK-COUNT-7: ( ( ( 1, 0, 0, 0 ), ( 0, 6, 0, 0 ), ( 0, 0, 11, 0 ) ), ( ( 0, 14, 0, 0 ), ( 0, 0, 0, 20 ), ( 21, 0, 0, 0 ) ) ) + call @dump(%src) : (tensor<2x3x4xf64>) -> () + call @dumpAndRelease_234(%d12) : (tensor<2x3x4xf64>) -> () + call @dumpAndRelease_234(%d13) : (tensor<2x3x4xf64>) -> () + call @dumpAndRelease_234(%d21) : (tensor<2x3x4xf64>) -> () + call @dumpAndRelease_234(%d23) : (tensor<2x3x4xf64>) -> () + call @dumpAndRelease_234(%d31) : (tensor<2x3x4xf64>) -> () + call @dumpAndRelease_234(%d32) : (tensor<2x3x4xf64>) -> () + + // + // Release sparse tensors. + // + bufferization.dealloc_tensor %t12 : tensor<2x3x4xf64, #SingletonTensor2> + bufferization.dealloc_tensor %t13 : tensor<2x3x4xf64, #SingletonTensor3> + bufferization.dealloc_tensor %t21 : tensor<2x3x4xf64, #SingletonTensor1> + bufferization.dealloc_tensor %t23 : tensor<2x3x4xf64, #SingletonTensor3> + bufferization.dealloc_tensor %t31 : tensor<2x3x4xf64, #SingletonTensor1> + bufferization.dealloc_tensor %t32 : tensor<2x3x4xf64, #SingletonTensor2> + + return + } + + // + // Main driver. + // + func.func @entry() { + call @testNonSingleton() : () -> () + call @testSingleton() : () -> () + return + } } diff --git a/mlir/test/Integration/Dialect/SparseTensor/python/test_stress.py b/mlir/test/Integration/Dialect/SparseTensor/python/test_stress.py --- a/mlir/test/Integration/Dialect/SparseTensor/python/test_stress.py +++ b/mlir/test/Integration/Dialect/SparseTensor/python/test_stress.py @@ -189,6 +189,7 @@ # TODO: While direct s2s is far too slow for per-commit testing, # we should have some framework ensure that we run this test with # `s2s=0` on a regular basis, to ensure that it does continue to work. + # TODO: be sure to test s2s=0 together with singletons. s2s = 1 sparsification_options = ( f'parallelization-strategy=none ' @@ -200,10 +201,12 @@ options=sparsification_options, opt_level=0, shared_libs=[support_lib]) f64 = ir.F64Type.get() # Be careful about increasing this because - # len(types) = 1 + 2^rank * rank! * len(bitwidths)^2 + # len(types) = 1 + len(level_choices)^rank * rank! * len(bitwidths)^2 shape = range(2, 6) rank = len(shape) # All combinations. + # TODO: add singleton here too; which requires updating how `np_arg0` + # is initialized below. levels = list(itertools.product(*itertools.repeat( [st.DimLevelType.dense, st.DimLevelType.compressed], rank))) # All permutations.