diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -187,6 +188,7 @@ const uint64_t *perm, const DimLevelType *sparsity) : dimSizes(szs), rev(getRank()), dimTypes(sparsity, sparsity + getRank()) { + assert(perm && sparsity); const uint64_t rank = getRank(); // Validate parameters. assert(rank > 0 && "Trivial shape is unsupported"); @@ -290,6 +292,135 @@ const std::vector dimTypes; }; +/// This class provides the interface required by `SparseTensorEnumerator`, +/// and is a superclass of `SparseTensorStorage` for abstracting +/// over the `` templating. We need that abstraction so that direct +/// sparse-to-sparse conversion need not take separate `` parameters +/// for the source vs the target (nor require that the source parameters +/// match the target parameters). +template +class EnumerableSparseTensorStorage : public SparseTensorStorageBase { +public: + EnumerableSparseTensorStorage(const std::vector &szs, + const uint64_t *perm, + const DimLevelType *sparsity) + : SparseTensorStorageBase(szs, perm, sparsity) {} + + /// Looks up the value stored at the given position. + virtual V getValue(uint64_t pos) const = 0; + + /// Looks up the `d`-level position/pointer stored at the given + /// `d-1`-level position, and converts the stored `P` into `uint64_t`. + virtual uint64_t getPointer(uint64_t d, uint64_t parentPos) const = 0; + + /// Looks up the `d`-level coordinate/index stored at the given + /// `d`-level position, and converts the stored `I` into `uint64_t`. + virtual uint64_t getIndex(uint64_t d, uint64_t pos) const = 0; +}; + +/// A (higher-order) function object for enumerating the elements of some +/// `SparseTensorStorage` under a permutation. That is, the `forallElements` +/// method encapsulates the loop-nest for enumerating the elements of +/// the source tensor (in whatever order is best for the source tensor), +/// and applies a permutation to the coordinates/indices before handing +/// each element to the callback. A single enumerator object can be +/// freely reused for several calls to `forallElements`, just so long +/// as each call is sequential with respect to one another. +/// +/// N.B., this class stores a reference to the `EnumerableSparseTensorStorage` +/// passed to the constructor; thus, objects of this class must not +/// outlive the sparse tensor they depend on. +template +class SparseTensorEnumerator { +public: + /// Constructs an enumerator with the identity permutation, thus + /// enumerating elements with the semantic-ordering of dimensions. + explicit SparseTensorEnumerator( + const EnumerableSparseTensorStorage &tensor) + : src(tensor), permsz(src.getRev().size()), reord(src.getRev()), + cursor(getRank()) { + const auto &sizes = src.getDimSizes(); + for (uint64_t rank = getRank(), s = 0; s < rank; s++) + permsz[reord[s]] = sizes[s]; + } + + /// Constructs an enumerator with the given permutation for mapping + /// the semantic-ordering of dimensions to the desired target-ordering. + /// + /// Precondition: `perm` must be valid for `rank`. + SparseTensorEnumerator(const EnumerableSparseTensorStorage &tensor, + uint64_t rank, const uint64_t *perm) + : src(tensor), permsz(src.getRev().size()), reord(getRank()), + cursor(getRank()) { + assert(perm); + assert(rank == getRank() && "Permutation rank mismatch"); + const auto &rev = src.getRev(); // source stg-order -> semantic-order + const auto &sizes = src.getDimSizes(); // in source storage-order + for (uint64_t s = 0; s < rank; s++) { // `s` source storage-order + uint64_t t = perm[rev[s]]; // `t` target-order + reord[s] = t; + permsz[t] = sizes[s]; + } + } + + SparseTensorEnumerator(const SparseTensorEnumerator &) = delete; + SparseTensorEnumerator(SparseTensorEnumerator &&) = delete; + SparseTensorEnumerator &operator=(const SparseTensorEnumerator &) = delete; + SparseTensorEnumerator &operator=(SparseTensorEnumerator &&) = delete; + + /// Returns the source/target tensor's rank. (The source-rank and + /// target-rank are always equal since we only support permutations. + /// Though once we add support for other dimension mappings, this + /// method will have to be split in two.) + inline uint64_t getRank() const { return permsz.size(); } + + /// Returns the target tensor's dimension sizes. + inline const std::vector &permutedSizes() const { return permsz; } + + /// The type of callback functions which receive an element (in target + /// order). We avoid packaging the coordinates and value together + /// as an `Element` object because this helps keep code somewhat cleaner. + typedef const std::function &, V)> + &ElementConsumer; + + /// Enumerates all elements of the source tensor, permutes their + /// indices, and passes the permuted element to the callback. + /// The callback must not store the cursor reference directly, since + /// this function reuses the storage. Instead, the callback must copy + /// it if they want to keep it. + inline void forallElements(ElementConsumer yield) { + forallElements(yield, 0, 0); + } + +private: + /// The recursive component of the public `forallElements`. + void forallElements(ElementConsumer yield, uint64_t parentPos, uint64_t d) { + if (d == getRank()) { + // TODO: + yield(cursor, src.getValue(parentPos)); + } else if (src.isCompressedDim(d)) { + const uint64_t pstart = src.getPointer(d, parentPos); + const uint64_t pstop = src.getPointer(d, parentPos + 1); + for (uint64_t pos = pstart; pos < pstop; pos++) { + cursor[reord[d]] = src.getIndex(d, pos); + forallElements(yield, pos, d + 1); + } + } else { // Dense dimension. + const uint64_t sz = src.getDimSizes()[d]; + const uint64_t pstart = parentPos * sz; + for (uint64_t i = 0; i < sz; i++) { + cursor[reord[d]] = i; + forallElements(yield, pstart + i, d + 1); + } + } + } + + const EnumerableSparseTensorStorage &src; + std::vector permsz; // in target order. + std::vector reord; // source storage-order -> target order. + std::vector cursor; // in target order. +}; + /// A memory-resident sparse tensor using a storage scheme based on /// per-dimension sparse/dense annotations. This data structure provides a /// bufferized form of a sparse tensor type. In contrast to generating setup @@ -297,7 +428,9 @@ /// a convenient "one-size-fits-all" solution that simply takes an input tensor /// and annotations to implement all required setup in a general manner. template -class SparseTensorStorage : public SparseTensorStorageBase { +class SparseTensorStorage : public EnumerableSparseTensorStorage { + using Base = EnumerableSparseTensorStorage; + public: /// Constructs a sparse tensor storage scheme with the given dimensions, /// permutation, and per-dimension dense/sparse annotations, using @@ -307,18 +440,18 @@ SparseTensorStorage(const std::vector &szs, const uint64_t *perm, const DimLevelType *sparsity, SparseTensorCOO *coo = nullptr) - : SparseTensorStorageBase(szs, perm, sparsity), pointers(getRank()), - indices(getRank()), idx(getRank()) { - const uint64_t rank = getRank(); + : Base(szs, perm, sparsity), pointers(Base::getRank()), + indices(Base::getRank()), idx(Base::getRank()) { + const uint64_t rank = Base::getRank(); // Provide hints on capacity of pointers and indices. // TODO: needs fine-tuning based on sparsity bool allDense = true; uint64_t sz = 1; for (uint64_t r = 0; r < rank; r++) { - const uint64_t sz_r = getDimSizes()[r]; + const uint64_t sz_r = Base::getDimSizes()[r]; assert(sz_r <= std::numeric_limits::max() / sz); sz *= sz_r; - if (isCompressedDim(r)) { + if (Base::isCompressedDim(r)) { pointers[r].reserve(sz + 1); pointers[r].push_back(0); indices[r].reserve(sz); @@ -329,7 +462,7 @@ // Then assign contents from coordinate scheme tensor if provided. if (coo) { // Ensure both preconditions of `fromCOO`. - assert(coo->getSizes() == getDimSizes() && "Tensor size mismatch"); + assert(coo->getSizes() == Base::getDimSizes() && "Tensor size mismatch"); coo->sort(); // Now actually insert the `elements`. const std::vector> &elements = coo->getElements(); @@ -345,11 +478,11 @@ /// Partially specialize these getter methods based on template types. void getPointers(std::vector

**out, uint64_t d) override { - assert(d < getRank()); + assert(d < Base::getRank()); *out = &pointers[d]; } void getIndices(std::vector **out, uint64_t d) override { - assert(d < getRank()); + assert(d < Base::getRank()); *out = &indices[d]; } void getValues(std::vector **out) override { *out = &values; } @@ -378,7 +511,7 @@ // Sort. std::sort(added, added + count); // Restore insertion path for first insert. - uint64_t rank = getRank(); + const uint64_t rank = Base::getRank(); uint64_t index = added[0]; cursor[rank - 1] = index; lexInsert(cursor, values[index]); @@ -409,24 +542,13 @@ /// sparse tensor in coordinate scheme with the given dimension order. /// /// Precondition: `perm` must be valid for `getRank()`. - SparseTensorCOO *toCOO(const uint64_t *perm) { - // Restore original order of the dimension sizes and allocate coordinate - // scheme with desired new ordering specified in perm. - const uint64_t rank = getRank(); - const auto &rev = getRev(); - const auto &sizes = getDimSizes(); - std::vector orgsz(rank); - for (uint64_t r = 0; r < rank; r++) - orgsz[rev[r]] = sizes[r]; - SparseTensorCOO *coo = SparseTensorCOO::newSparseTensorCOO( - rank, orgsz.data(), perm, values.size()); - // Populate coordinate scheme restored from old ordering and changed with - // new ordering. Rather than applying both reorderings during the recursion, - // we compute the combine permutation in advance. - std::vector reord(rank); - for (uint64_t r = 0; r < rank; r++) - reord[r] = perm[rev[r]]; - toCOO(*coo, reord, 0, 0); + SparseTensorCOO *toCOO(const uint64_t *perm) const { + SparseTensorEnumerator enumerator(*this, Base::getRank(), perm); + SparseTensorCOO *coo = + new SparseTensorCOO(enumerator.permutedSizes(), values.size()); + enumerator.forallElements([&coo](const std::vector &ind, V val) { + coo->add(ind, val); + }); // TODO: This assertion assumes there are no stored zeros, // or if there are then that we don't filter them out. // Cf., @@ -468,7 +590,7 @@ /// does not check that `pos` is semantically valid (i.e., larger than /// the previous position and smaller than `indices[d].capacity()`). inline void appendPointer(uint64_t d, uint64_t pos, uint64_t count = 1) { - assert(isCompressedDim(d)); + assert(Base::isCompressedDim(d)); assert(pos <= std::numeric_limits

::max() && "Pointer value is too large for the P-type"); pointers[d].insert(pointers[d].end(), count, static_cast

(pos)); @@ -483,7 +605,7 @@ /// appends the appropriate number of zeros to the `values` array, where /// `full` indicates the highest index already written for this segment. void appendIndex(uint64_t d, uint64_t full, uint64_t i) { - if (isCompressedDim(d)) { + if (Base::isCompressedDim(d)) { assert(i <= std::numeric_limits::max() && "Index value is too large for the I-type"); indices[d].push_back(static_cast(i)); @@ -491,7 +613,7 @@ assert(i >= full && "Index was already filled"); if (i == full) return; // Short-circuit, since it'll be a nop. - if (d + 1 == getRank()) + if (d + 1 == Base::getRank()) values.insert(values.end(), i - full, 0); else finalizeSegment(d + 1, 0, i - full); @@ -508,9 +630,10 @@ /// and pointwise less-than). void fromCOO(const std::vector> &elements, uint64_t lo, uint64_t hi, uint64_t d) { + const uint64_t rank = Base::getRank(); + assert(d <= rank && hi <= elements.size()); // Once dimensions are exhausted, insert the numerical values. - assert(d <= getRank() && hi <= elements.size()); - if (d == getRank()) { + if (d == rank) { assert(lo < hi); values.push_back(elements[lo].value); return; @@ -525,7 +648,7 @@ seg++; // Handle segment in interval for sparse or dense dimension. appendIndex(d, full, i); - if (!isCompressedDim(d)) // i.e., *is* dense. + if (!Base::isCompressedDim(d)) // i.e., *is* dense. full = i + 1; fromCOO(elements, lo, seg, d + 1); // And move on to next segment in interval. @@ -535,39 +658,14 @@ finalizeSegment(d, full); } - /// Stores the sparse tensor storage scheme into a memory-resident sparse - /// tensor in coordinate scheme. - void toCOO(SparseTensorCOO &tensor, std::vector &reord, - uint64_t pos, uint64_t d) { - assert(d <= getRank()); - if (d == getRank()) { - assert(pos < values.size()); - tensor.add(idx, values[pos]); - } else if (isCompressedDim(d)) { - // Sparse dimension. - for (uint64_t ii = pointers[d][pos]; ii < pointers[d][pos + 1]; ii++) { - idx[reord[d]] = indices[d][ii]; - toCOO(tensor, reord, ii, d + 1); - } - } else { - // Dense dimension. - const uint64_t sz = getDimSizes()[d]; - const uint64_t off = pos * sz; - for (uint64_t i = 0; i < sz; i++) { - idx[reord[d]] = i; - toCOO(tensor, reord, off + i, d + 1); - } - } - } - /// Finalize the sparse pointer structure at this dimension. void finalizeSegment(uint64_t d, uint64_t full = 0, uint64_t count = 1) { if (count == 0) return; // Short-circuit, since it'll be a nop. - if (isCompressedDim(d)) { + if (Base::isCompressedDim(d)) { appendPointer(d, indices[d].size(), count); } else { // Dense dimension. - const uint64_t sz = getDimSizes()[d]; + const uint64_t sz = Base::getDimSizes()[d]; assert(sz >= full && "Segment is overfull"); // Assuming we check for overflows in the constructor, then this // multiply will never overflow. @@ -576,7 +674,7 @@ // in this dimension (i.e., coordinates after the last non-zero // element), and either fill in their zero values or else recurse // to finalize some deeper dimension. - if (d + 1 == getRank()) + if (d + 1 == Base::getRank()) values.insert(values.end(), count, 0); else finalizeSegment(d + 1, 0, count); @@ -585,7 +683,7 @@ /// Wraps up a single insertion path, inner to outer. void endPath(uint64_t diff) { - uint64_t rank = getRank(); + const uint64_t rank = Base::getRank(); assert(diff <= rank); for (uint64_t i = 0; i < rank - diff; i++) { const uint64_t d = rank - i - 1; @@ -595,7 +693,7 @@ /// Continues a single insertion path, outer to inner. void insPath(const uint64_t *cursor, uint64_t diff, uint64_t top, V val) { - uint64_t rank = getRank(); + const uint64_t rank = Base::getRank(); assert(diff < rank); for (uint64_t d = diff; d < rank; d++) { uint64_t i = cursor[d]; @@ -608,7 +706,7 @@ /// Finds the lexicographic differing dimension. uint64_t lexDiff(const uint64_t *cursor) const { - for (uint64_t r = 0, rank = getRank(); r < rank; r++) + for (uint64_t r = 0, rank = Base::getRank(); r < rank; r++) if (cursor[r] > idx[r]) return r; else @@ -617,6 +715,26 @@ return -1u; } +protected: + // Allow `SparseTensorEnumerator` to access these methods without + // making them public to other client code. + friend class SparseTensorEnumerator; + V getValue(uint64_t pos) const override { + assert(pos < values.size() && "Value position is out of bounds"); + return values[pos]; + } + uint64_t getPointer(uint64_t d, uint64_t parentPos) const override { + assert(Base::isCompressedDim(d)); // Entails `d < getRank()`. + assert(parentPos < pointers[d].size() && + "Pointer position is out of bounds"); + return pointers[d][parentPos]; // Converts the stored `P` into `uint64_t`. + } + uint64_t getIndex(uint64_t d, uint64_t pos) const override { + assert(Base::isCompressedDim(d)); // Entails `d < getRank()`. + assert(pos < indices[d].size() && "Index position is out of bounds"); + return indices[d][pos]; // Converts the stored `I` into `uint64_t`. + } + private: std::vector> pointers; std::vector> indices;