diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h @@ -54,7 +54,7 @@ /// by indices before passing it back to the client (most packed storage /// formats require the elements to appear in lexicographic index order). template -struct SparseTensorCOO final { +class SparseTensorCOO final { public: SparseTensorCOO(const std::vector &dimSizes, uint64_t capacity) : dimSizes(dimSizes) { @@ -64,6 +64,33 @@ } } + /// Factory method. Permutes the original dimensions according to + /// the given ordering and expects subsequent add() calls to honor + /// that same ordering for the given indices. The result is a + /// fully permuted coordinate scheme. + /// + /// Precondition: `dimSizes` and `perm` must be valid for `rank`. + static SparseTensorCOO *newSparseTensorCOO(uint64_t rank, + const uint64_t *dimSizes, + const uint64_t *perm, + uint64_t capacity = 0) { + std::vector permsz(rank); + for (uint64_t r = 0; r < rank; r++) { + assert(dimSizes[r] > 0 && "Dimension size zero has trivial storage"); + permsz[perm[r]] = dimSizes[r]; + } + return new SparseTensorCOO(permsz, capacity); + } + + /// Get the rank of the tensor. + uint64_t getRank() const { return dimSizes.size(); } + + /// Getter for the dimension-sizes array. + const std::vector &getDimSizes() const { return dimSizes; } + + /// Getter for the elements array. + const std::vector> &getElements() const { return elements; } + /// Adds element as indices and value. void add(const std::vector &ind, V val) { assert(!iteratorLocked && "Attempt to add() after startIterator()"); @@ -107,15 +134,6 @@ }); } - /// Get the rank of the tensor. - uint64_t getRank() const { return dimSizes.size(); } - - /// Getter for the dimension-sizes array. - const std::vector &getDimSizes() const { return dimSizes; } - - /// Getter for the elements array. - const std::vector> &getElements() const { return elements; } - /// Switch into iterator mode. void startIterator() { iteratorLocked = true; @@ -131,24 +149,6 @@ return nullptr; } - /// Factory method. Permutes the original dimensions according to - /// the given ordering and expects subsequent add() calls to honor - /// that same ordering for the given indices. The result is a - /// fully permuted coordinate scheme. - /// - /// Precondition: `dimSizes` and `perm` must be valid for `rank`. - static SparseTensorCOO *newSparseTensorCOO(uint64_t rank, - const uint64_t *dimSizes, - const uint64_t *perm, - uint64_t capacity = 0) { - std::vector permsz(rank); - for (uint64_t r = 0; r < rank; r++) { - assert(dimSizes[r] > 0 && "Dimension size zero has trivial storage"); - permsz[perm[r]] = dimSizes[r]; - } - return new SparseTensorCOO(permsz, capacity); - } - private: const std::vector dimSizes; // per-dimension sizes std::vector> elements; // all COO elements diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -41,31 +41,6 @@ namespace mlir { namespace sparse_tensor { -namespace detail { - -// TODO: try to unify this with `SparseTensorFile::assertMatchesShape` -// which is used by `openSparseTensorCOO`. It's easy enough to resolve -// the `std::vector` vs pointer mismatch for `dimSizes`; but it's trickier -// to resolve the presence/absence of `perm` (without introducing extra -// overhead), so perhaps the code duplication is unavoidable. -// -/// Asserts that the `dimSizes` (in target-order) under the `perm` (mapping -/// semantic-order to target-order) are a refinement of the desired `shape` -/// (in semantic-order). -/// -/// Precondition: `perm` and `shape` must be valid for `rank`. -inline void assertPermutedSizesMatchShape(const std::vector &dimSizes, - uint64_t rank, const uint64_t *perm, - const uint64_t *shape) { - assert(perm && shape); - assert(rank == dimSizes.size() && "Rank mismatch"); - for (uint64_t r = 0; r < rank; r++) - assert((shape[r] == 0 || shape[r] == dimSizes[perm[r]]) && - "Dimension size mismatch"); -} - -} // namespace detail - // This forward decl is sufficient to split `SparseTensorStorageBase` into // its own header, but isn't sufficient for `SparseTensorStorage` to join it. template @@ -78,6 +53,16 @@ /// specialization, which the C-API relies on to catch type errors /// arising from our use of opaque pointers. class SparseTensorStorageBase { +protected: + // Since this class is virtual, we must disallow public copying in + // order to avoid "slicing". Since this class has data members, + // that means making copying protected. + // + SparseTensorStorageBase(const SparseTensorStorageBase &) = default; + // Copy-assignment would be implicitly deleted (because `dimSizes` + // is const), so we explicitly delete it for clarity. + SparseTensorStorageBase &operator=(const SparseTensorStorageBase &) = delete; + public: /// Constructs a new storage object. The `perm` maps the tensor's /// semantic-ordering of dimensions to this object's storage-order. @@ -150,16 +135,6 @@ /// Finishes insertion. virtual void endInsert() = 0; -protected: - // Since this class is virtual, we must disallow public copying in - // order to avoid "slicing". Since this class has data members, - // that means making copying protected. - // - SparseTensorStorageBase(const SparseTensorStorageBase &) = default; - // Copy-assignment would be implicitly deleted (because `dimSizes` - // is const), so we explicitly delete it for clarity. - SparseTensorStorageBase &operator=(const SparseTensorStorageBase &) = delete; - private: const std::vector dimSizes; std::vector rev; @@ -198,42 +173,7 @@ /// Precondition: `perm` and `sparsity` must be valid for `dimSizes.size()`. SparseTensorStorage(const std::vector &dimSizes, const uint64_t *perm, const DimLevelType *sparsity, - SparseTensorCOO *coo) - : SparseTensorStorage(dimSizes, perm, sparsity) { - // Provide hints on capacity of pointers and indices. - // TODO: needs much fine-tuning based on actual sparsity; currently - // we reserve pointer/index space based on all previous dense - // dimensions, which works well up to first sparse dim; but - // we should really use nnz and dense/sparse distribution. - bool allDense = true; - uint64_t sz = 1; - for (uint64_t r = 0, rank = getRank(); r < rank; r++) { - if (isCompressedDim(r)) { - // TODO: Take a parameter between 1 and `dimSizes[r]`, and multiply - // `sz` by that before reserving. (For now we just use 1.) - pointers[r].reserve(sz + 1); - pointers[r].push_back(0); - indices[r].reserve(sz); - sz = 1; - allDense = false; - } else { // Dense dimension. - sz = detail::checkedMul(sz, getDimSizes()[r]); - } - } - // Then assign contents from coordinate scheme tensor if provided. - if (coo) { - // Ensure both preconditions of `fromCOO`. - assert(coo->getDimSizes() == getDimSizes() && "Tensor size mismatch"); - coo->sort(); - // Now actually insert the `elements`. - const std::vector> &elements = coo->getElements(); - uint64_t nnz = elements.size(); - values.reserve(nnz); - fromCOO(elements, 0, nnz, 0); - } else if (allDense) { - values.resize(sz, 0); - } - } + SparseTensorCOO *coo); /// Constructs a sparse tensor storage scheme with the given dimensions, /// permutation, and per-dimension dense/sparse annotations, using @@ -246,6 +186,29 @@ const uint64_t *perm, const DimLevelType *sparsity, const SparseTensorStorageBase &tensor); + /// Factory method. Constructs a sparse tensor storage scheme with the given + /// dimensions, permutation, and per-dimension dense/sparse annotations, + /// using the coordinate scheme tensor for the initial contents if provided. + /// In the latter case, the coordinate scheme must respect the same + /// permutation as is desired for the new sparse tensor storage. + /// + /// Precondition: `shape`, `perm`, and `sparsity` must be valid for `rank`. + static SparseTensorStorage * + newSparseTensor(uint64_t rank, const uint64_t *shape, const uint64_t *perm, + const DimLevelType *sparsity, SparseTensorCOO *coo); + + /// Factory method. Constructs a sparse tensor storage scheme with + /// the given dimensions, permutation, and per-dimension dense/sparse + /// annotations, using the sparse tensor for the initial contents. + /// + /// Preconditions: + /// * `shape`, `perm`, and `sparsity` must be valid for `rank`. + /// * The `tensor` must have the same value type `V`. + static SparseTensorStorage * + newSparseTensor(uint64_t rank, const uint64_t *shape, const uint64_t *perm, + const DimLevelType *sparsity, + const SparseTensorStorageBase *source); + ~SparseTensorStorage() final = default; /// Partially specialize these getter methods based on template types. @@ -335,55 +298,6 @@ return coo; } - /// Factory method. Constructs a sparse tensor storage scheme with the given - /// dimensions, permutation, and per-dimension dense/sparse annotations, - /// using the coordinate scheme tensor for the initial contents if provided. - /// In the latter case, the coordinate scheme must respect the same - /// permutation as is desired for the new sparse tensor storage. - /// - /// Precondition: `shape`, `perm`, and `sparsity` must be valid for `rank`. - static SparseTensorStorage * - newSparseTensor(uint64_t rank, const uint64_t *shape, const uint64_t *perm, - const DimLevelType *sparsity, SparseTensorCOO *coo) { - SparseTensorStorage *n = nullptr; - if (coo) { - const auto &coosz = coo->getDimSizes(); - detail::assertPermutedSizesMatchShape(coosz, rank, perm, shape); - n = new SparseTensorStorage(coosz, perm, sparsity, coo); - } else { - std::vector permsz(rank); - for (uint64_t r = 0; r < rank; r++) { - assert(shape[r] > 0 && "Dimension size zero has trivial storage"); - permsz[perm[r]] = shape[r]; - } - // We pass the null `coo` to ensure we select the intended constructor. - n = new SparseTensorStorage(permsz, perm, sparsity, coo); - } - return n; - } - - /// Factory method. Constructs a sparse tensor storage scheme with - /// the given dimensions, permutation, and per-dimension dense/sparse - /// annotations, using the sparse tensor for the initial contents. - /// - /// Preconditions: - /// * `shape`, `perm`, and `sparsity` must be valid for `rank`. - /// * The `tensor` must have the same value type `V`. - static SparseTensorStorage * - newSparseTensor(uint64_t rank, const uint64_t *shape, const uint64_t *perm, - const DimLevelType *sparsity, - const SparseTensorStorageBase *source) { - assert(source && "Got nullptr for source"); - SparseTensorEnumeratorBase *enumerator; - source->newEnumerator(&enumerator, rank, perm); - const auto &permsz = enumerator->permutedSizes(); - detail::assertPermutedSizesMatchShape(permsz, rank, perm, shape); - auto *tensor = - new SparseTensorStorage(permsz, perm, sparsity, *source); - delete enumerator; - return tensor; - } - private: /// Appends an arbitrary new position to `pointers[d]`. This method /// checks that `pos` is representable in the `P` type; however, it @@ -751,6 +665,110 @@ std::vector> nnz; }; +//===----------------------------------------------------------------------===// +// Definitions of the ctors and factories of `SparseTensorStorage`. + +namespace detail { + +// TODO: try to unify this with `SparseTensorFile::assertMatchesShape` +// which is used by `openSparseTensorCOO`. It's easy enough to resolve +// the `std::vector` vs pointer mismatch for `dimSizes`; but it's trickier +// to resolve the presence/absence of `perm` (without introducing extra +// overhead), so perhaps the code duplication is unavoidable. +// +/// Asserts that the `dimSizes` (in target-order) under the `perm` (mapping +/// semantic-order to target-order) are a refinement of the desired `shape` +/// (in semantic-order). +/// +/// Precondition: `perm` and `shape` must be valid for `rank`. +inline void assertPermutedSizesMatchShape(const std::vector &dimSizes, + uint64_t rank, const uint64_t *perm, + const uint64_t *shape) { + assert(perm && shape); + assert(rank == dimSizes.size() && "Rank mismatch"); + for (uint64_t r = 0; r < rank; r++) + assert((shape[r] == 0 || shape[r] == dimSizes[perm[r]]) && + "Dimension size mismatch"); +} + +} // namespace detail + +template +SparseTensorStorage *SparseTensorStorage::newSparseTensor( + uint64_t rank, const uint64_t *shape, const uint64_t *perm, + const DimLevelType *sparsity, SparseTensorCOO *coo) { + SparseTensorStorage *n = nullptr; + if (coo) { + const auto &coosz = coo->getDimSizes(); + detail::assertPermutedSizesMatchShape(coosz, rank, perm, shape); + n = new SparseTensorStorage(coosz, perm, sparsity, coo); + } else { + std::vector permsz(rank); + for (uint64_t r = 0; r < rank; r++) { + assert(shape[r] > 0 && "Dimension size zero has trivial storage"); + permsz[perm[r]] = shape[r]; + } + // We pass the null `coo` to ensure we select the intended constructor. + n = new SparseTensorStorage(permsz, perm, sparsity, coo); + } + return n; +} + +template +SparseTensorStorage *SparseTensorStorage::newSparseTensor( + uint64_t rank, const uint64_t *shape, const uint64_t *perm, + const DimLevelType *sparsity, const SparseTensorStorageBase *source) { + assert(source && "Got nullptr for source"); + SparseTensorEnumeratorBase *enumerator; + source->newEnumerator(&enumerator, rank, perm); + const auto &permsz = enumerator->permutedSizes(); + detail::assertPermutedSizesMatchShape(permsz, rank, perm, shape); + auto *tensor = + new SparseTensorStorage(permsz, perm, sparsity, *source); + delete enumerator; + return tensor; +} + +template +SparseTensorStorage::SparseTensorStorage( + const std::vector &dimSizes, const uint64_t *perm, + const DimLevelType *sparsity, SparseTensorCOO *coo) + : SparseTensorStorage(dimSizes, perm, sparsity) { + // Provide hints on capacity of pointers and indices. + // TODO: needs much fine-tuning based on actual sparsity; currently + // we reserve pointer/index space based on all previous dense + // dimensions, which works well up to first sparse dim; but + // we should really use nnz and dense/sparse distribution. + bool allDense = true; + uint64_t sz = 1; + for (uint64_t r = 0, rank = getRank(); r < rank; r++) { + if (isCompressedDim(r)) { + // TODO: Take a parameter between 1 and `dimSizes[r]`, and multiply + // `sz` by that before reserving. (For now we just use 1.) + pointers[r].reserve(sz + 1); + pointers[r].push_back(0); + indices[r].reserve(sz); + sz = 1; + allDense = false; + } else { // Dense dimension. + sz = detail::checkedMul(sz, getDimSizes()[r]); + } + } + // Then assign contents from coordinate scheme tensor if provided. + if (coo) { + // Ensure both preconditions of `fromCOO`. + assert(coo->getDimSizes() == getDimSizes() && "Tensor size mismatch"); + coo->sort(); + // Now actually insert the `elements`. + const std::vector> &elements = coo->getElements(); + uint64_t nnz = elements.size(); + values.reserve(nnz); + fromCOO(elements, 0, nnz, 0); + } else if (allDense) { + values.resize(sz, 0); + } +} + template SparseTensorStorage::SparseTensorStorage( const std::vector &dimSizes, const uint64_t *perm, diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -12,34 +12,6 @@ // operates on sparse tensors. The provided functionality is **not** part // of core MLIR, however. // -//===----------------------------------------------------------------------===// - -#include "mlir/ExecutionEngine/SparseTensorUtils.h" -#include "mlir/ExecutionEngine/SparseTensor/COO.h" -#include "mlir/ExecutionEngine/SparseTensor/ErrorHandling.h" -#include "mlir/ExecutionEngine/SparseTensor/File.h" -#include "mlir/ExecutionEngine/SparseTensor/Storage.h" - -#ifdef MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace mlir::sparse_tensor; - -//===----------------------------------------------------------------------===// -// -// Internal support for storing and reading sparse tensors. -// // The following memory-resident sparse storage schemes are supported: // // (a) A coordinate scheme for temporarily storing and lexicographically @@ -72,6 +44,28 @@ // //===----------------------------------------------------------------------===// +#include "mlir/ExecutionEngine/SparseTensorUtils.h" +#include "mlir/ExecutionEngine/SparseTensor/COO.h" +#include "mlir/ExecutionEngine/SparseTensor/ErrorHandling.h" +#include "mlir/ExecutionEngine/SparseTensor/File.h" +#include "mlir/ExecutionEngine/SparseTensor/Storage.h" + +#ifdef MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace mlir::sparse_tensor; + namespace { /// Initializes sparse tensor from an external COO-flavored format.