diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -530,7 +530,7 @@ /// constructors. This only does the bare minimum setup for the /// other constructors, thus the resulting object is not guaranteed /// to be in a valid state. In particular `isCompressedDim` (and - /// hence `appendCurrentPointer` etc) will not work. + /// hence `appendPointer` etc) will not work. /// /// Precondition: `perm` and `sparsity` must be valid for `szs.size()`. SparseTensorStorage(const std::vector &szs, const uint64_t *perm, @@ -538,7 +538,9 @@ : sizes(szs), rev(getRank()), idx(getRank()), pointers(getRank()), indices(getRank()) { // Validate parameters. - for (uint64_t rank = getRank(), r = 0; r < rank; r++) { + uint64_t rank = getRank(); + assert(rank > 0 && "Rank zero is unsupported"); + for (uint64_t r = 0; r < rank; r++) { assert(sizes[r] > 0 && "Dimension size zero has trivial storage"); assert((sparsity[r] == DimLevelType::kDense || sparsity[r] == DimLevelType::kCompressed) && @@ -770,7 +772,7 @@ /// Finalizes lexicographic insertions. void endInsert() override { if (values.empty()) - endDim(0); + finalizeSegment(0); else endPath(0); } @@ -837,14 +839,6 @@ } private: - /// Appends the next free position of `indices[d]` to `pointers[d]`. - /// Thus, when called after inserting the last element of a segment, - /// it will append the position where the next segment begins. - inline void appendCurrentPointer(uint64_t d) { - assert(isCompressedDim(d)); // Entails `d < getRank()`. - appendPointer(d, indices[d].size()); - } - /// Appends an arbitrary new position to `pointers[d]`. This method /// checks that `pos` is representable in the `P` type; however, it /// does not check that `pos` is semantically valid (i.e., larger than @@ -858,15 +852,34 @@ pointers[d].push_back(pos); // Here is where we convert to `P`. } - /// Appends the given index to `indices[d]`. This method checks that - /// `i` is representable in the `I` type; however, it does not check - /// that `i` is semantically valid (i.e., in bounds for `sizes[d]` - /// and not previously occurring in the same segment). - inline void appendIndex(uint64_t d, uint64_t i) { - assert(isCompressedDim(d)); // Entails `d < getRank()`. - assert(i <= std::numeric_limits::max() && - "Index value is too large for the I-type"); - indices[d].push_back(i); // Here is where we convert to `I`. + /// Finalize the sparse pointer structure at this dimension. + void finalizeSegment(uint64_t d, uint64_t full = 0, uint64_t count = 1) { + if (count == 0) + return; // Short-circuit, since it'll be a nop. + assert(d < getRank()); + if (isCompressedDim(d)) { + const uint64_t pos = indices[d].size(); + // Inlined `appendPointer(d, pos)` called `count` times. + assert(pos <= std::numeric_limits

::max() && + "Pointer value is too large for the P-type"); + const P p = pos; // Convert the `uint64_t` to `P`. + pointers[d].insert(pointers[d].end(), count, p); + } else { + const uint64_t sz = sizes[d]; + assert(sz >= full && "Segment is overfull"); + // Assuming we check for overflows in the constructor, then this + // multiply will never overflow. + count *= sz - full; + // For dense storage we must enumerate all the remaining coordinates + // in this dimension (i.e., coordinates after the last non-zero + // element), and either fill in their zero values or else recurse + // to finalize some deeper dimension. + if (d + 1 == getRank()) + // TODO: can we replace this with `memset`, or is this good enough? + values.insert(values.end(), count, 0); + else + finalizeSegment(d + 1, 0, count); + } } /// Writes the given coordinate to `indices[d][pos]`. This method @@ -886,6 +899,32 @@ indices[d][pos] = i; // Here is where we convert to `I`. } + /// Appends index `i` to dimension `d`, in the semantically general + /// sense. For non-dense dimensions, that means appending to the + /// `indices[d]` array, checking that `i` is representable in the `I` + /// type; however, we do not verify other semantic requirements (e.g., + /// that `i` is in bounds for `sizes[d]`, and not previously occurring + /// in the same segment). For dense dimensions, this method instead + /// appends the appropriate number of zeros to the `values` array, where + /// `full` indicates the highest index already written for this segment. + void appendIndex(uint64_t d, uint64_t full, uint64_t i) { + assert(d < getRank()); + if (isCompressedDim(d)) { + assert(i <= std::numeric_limits::max() && + "Index value is too large for the I-type"); + indices[d].push_back(i); // Here is where we convert `uint64_t` to `I`. + } else { + assert(i >= full && "Index was already filled"); + if (i == full) + return; // short-circuit nop. + if (d + 1 == getRank()) + // TODO: can we replace this with `memset`, or is this good enough? + values.insert(values.end(), i - full, 0); + else + finalizeSegment(d + 1, 0, i - full); + } + } + /// Initializes sparse tensor storage scheme from a memory-resident sparse /// tensor in coordinate scheme. This method prepares the pointers and /// indices arrays under the given per-dimension dense/sparse annotations. @@ -912,42 +951,15 @@ while (seg < hi && elements[seg].indices[d] == i) seg++; // Handle segment in interval for sparse or dense dimension. - if (isCompressedDim(d)) { - appendIndex(d, i); - } else { - // For dense storage we must fill in all the zero values between - // the previous element (when last we ran this for-loop) and the - // current element. - for (; full < i; full++) - endDim(d + 1); - full++; - } + appendIndex(d, full, i); + if (!isCompressedDim(d)) // i.e., is `DimLevelType::kDense` + full = i + 1; fromCOO(elements, lo, seg, d + 1); // And move on to next segment in interval. lo = seg; } // Finalize the sparse pointer structure at this dimension. - if (isCompressedDim(d)) { - appendCurrentPointer(d); - } else { - // For dense storage we must fill in all the zero values after - // the last element. - for (uint64_t sz = sizes[d]; full < sz; full++) - endDim(d + 1); - } - } - - /// Ends a deeper, never seen before dimension. - void endDim(uint64_t d) { - assert(d <= getRank()); - if (d == getRank()) { - values.push_back(0); - } else if (isCompressedDim(d)) { - appendCurrentPointer(d); - } else { - for (uint64_t full = 0, sz = sizes[d]; full < sz; full++) - endDim(d + 1); - } + finalizeSegment(d, full); } /// Wraps up a single insertion path, inner to outer. @@ -956,12 +968,7 @@ assert(diff <= rank); for (uint64_t i = 0; i < rank - diff; i++) { uint64_t d = rank - i - 1; - if (isCompressedDim(d)) { - appendCurrentPointer(d); - } else { - for (uint64_t full = idx[d] + 1, sz = sizes[d]; full < sz; full++) - endDim(d + 1); - } + finalizeSegment(d, idx[d] + 1); } } @@ -971,12 +978,7 @@ assert(diff < rank); for (uint64_t d = diff; d < rank; d++) { uint64_t i = cursor[d]; - if (isCompressedDim(d)) { - appendIndex(d, i); - } else { - for (uint64_t full = top; full < i; full++) - endDim(d + 1); - } + appendIndex(d, top, i); top = 0; idx[d] = i; }