diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h @@ -26,45 +26,40 @@ namespace mlir { namespace sparse_tensor { -/// An element of a sparse tensor in coordinate-scheme representation -/// (i.e., a pair of coordinates and value). For example, a rank-1 -/// vector element would look like -/// ({i}, a[i]) -/// and a rank-5 tensor element would look like -/// ({i,j,k,l,m}, a[i,j,k,l,m]) +/// Elements of a sparse tensor are stored in three arrays: an array for +/// coordinates, an array for values and an array for the element IDs. The ID +/// of an element is the index of the element values in the values-array. The +/// coordinates of the elements are stored in index [ID*rank .. ID*rank+rank) +/// in the coordinates-array. The element-ID-array allows us to sort the +/// elements without having to move the coordinates and the values. /// -/// The coordinates are represented as a (non-owning) pointer into -/// a shared pool of coordinates, rather than being stored directly in -/// this object. This significantly improves performance because it: +/// This significantly improves performance because it: /// (1) reduces the per-element memory footprint, and (2) centralizes -/// the memory management for coordinates. The only downside is that -/// the coordinates themselves cannot be retrieved without knowing the -/// rank of the tensor to which this element belongs (and that rank is -/// not stored in this object). -template -struct Element final { - Element(const uint64_t *coords, V val) : coords(coords), value(val){}; - const uint64_t *coords; // pointer into shared coordinates pool - V value; -}; +/// the memory management for coordinates and values. The only downside is that +/// we can't just return an `element` of the tensor that contains both the +/// coordinates and the value. Instead, we return the coordinates and the value +/// of the element through getCoords and getValue member functions of the sparse +/// tensor representation. +using ElementId = uint64_t; /// Closure object for `operator<` on `Element` with a given rank. -template struct ElementLT final { - ElementLT(uint64_t rank) : rank(rank) {} + ElementLT(const uint64_t *coords, uint64_t rank) + : coords(coords), rank(rank) {} /// Compares two elements a la `operator<`. /// /// Precondition: the elements must both be valid for `rank`. - bool operator()(const Element &e1, const Element &e2) const { + bool operator()(const ElementId e1, const ElementId e2) const { for (uint64_t d = 0; d < rank; ++d) { - if (e1.coords[d] == e2.coords[d]) + if (coords[e1 * rank + d] == coords[e2 * rank + d]) continue; - return e1.coords[d] < e2.coords[d]; + return coords[e1 * rank + d] < coords[e2 * rank + d]; } return false; } + const uint64_t *coords; // Pointer to the coordinates array. const uint64_t rank; }; @@ -90,18 +85,17 @@ template class SparseTensorCOO final { public: - using value_type = const Element; + using value_type = const ElementId; using reference = value_type &; using const_reference = reference; // The types associated with `std::vector` differ significantly between // C++11/17 vs C++20; so we explicitly defer to whatever `std::vector` // says the types should be. - using vector_type = std::vector>; + using vector_type = std::vector; using iterator = typename vector_type::const_iterator; using const_iterator = iterator; using difference_type = typename vector_type::difference_type; using size_type = typename vector_type::size_type; - /// Constructs a new coordinate-scheme sparse tensor with the given /// sizes and initial storage capacity. /// @@ -131,8 +125,9 @@ for (uint64_t d = 0; d < dimRank; ++d) assert(dimSizes[d] > 0 && "Dimension size zero has trivial storage"); if (capacity) { - elements.reserve(capacity); + elementIds.reserve(capacity); coordinates.reserve(capacity * dimRank); + values.reserve(capacity); } } @@ -142,11 +137,22 @@ /// Gets the dimension-sizes array. const std::vector &getDimSizes() const { return dimSizes; } - /// Gets the elements array. - const std::vector> &getElements() const { return elements; } + uint64_t getNse() const { return elementIds.size(); } + + V getValueForId(uint64_t i) const { return values[i]; } + const uint64_t *getCoordsForId(uint64_t i) const { + return &coordinates.data()[i * getRank()]; + } + + V getValue(uint64_t i) const { return getValueForId(elementIds[i]); } + const uint64_t *getCoords(uint64_t i) const { + return getCoordsForId(elementIds[i]); + } /// Returns the `operator<` closure object for the COO's element type. - ElementLT getElementLT() const { return ElementLT(getRank()); } + ElementLT getElementLT() const { + return ElementLT(coordinates.data(), getRank()); + } /// Adds an element to the tensor. This method does not check whether /// `dimCoords` is already associated with a value, it adds it regardless. @@ -159,8 +165,6 @@ /// * the `dimCoords` is valid for `getRank`. /// * the components of `dimCoords` are valid for `getDimSizes`. void add(const std::vector &dimCoords, V val) { - const uint64_t *base = coordinates.data(); - const uint64_t size = coordinates.size(); const uint64_t dimRank = getRank(); assert(dimCoords.size() == dimRank && "Element rank mismatch"); for (uint64_t d = 0; d < dimRank; ++d) { @@ -168,26 +172,15 @@ "Coordinate is too large for the dimension"); coordinates.push_back(dimCoords[d]); } - // This base only changes if `coordinates` was reallocated. In which - // case, we need to correct all previous pointers into the vector. - // Note that this only happens if we did not set the initial capacity - // right, and then only for every internal vector reallocation (which - // with the doubling rule should only incur an amortized linear overhead). - const uint64_t *const newBase = coordinates.data(); - if (newBase != base) { - for (uint64_t i = 0, n = elements.size(); i < n; ++i) - elements[i].coords = newBase + (elements[i].coords - base); - base = newBase; - } - // Add the new element and update the sorted bit. - const Element addedElem(base + size, val); - if (!elements.empty() && isSorted) - isSorted = getElementLT()(elements.back(), addedElem); - elements.push_back(addedElem); + values.push_back(val); + const uint64_t size = elementIds.size(); + if (!elementIds.empty() && isSorted) + isSorted = getElementLT()(elementIds.back(), size); + elementIds.push_back(size); } - const_iterator begin() const { return elements.cbegin(); } - const_iterator end() const { return elements.cend(); } + const_iterator begin() const { return elementIds.cbegin(); } + const_iterator end() const { return elementIds.cend(); } /// Sorts elements lexicographically by coordinates. If a coordinate /// is mapped to multiple values, then the relative order of those @@ -197,14 +190,17 @@ void sort() { if (isSorted) return; - std::sort(elements.begin(), elements.end(), getElementLT()); + std::sort(elementIds.begin(), elementIds.end(), getElementLT()); isSorted = true; } private: const std::vector dimSizes; // per-dimension sizes - std::vector> elements; // all COO elements + // All COO element IDs. When isSorted == true, this array gives the IDs in the + // order of the sorted elements. + std::vector elementIds; std::vector coordinates; // shared coordinate pool + std::vector values; bool isSorted; }; diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h @@ -438,9 +438,8 @@ const char *filename) { assert(filename && "Got nullptr for filename"); const auto &dimSizes = coo.getDimSizes(); - const auto &elements = coo.getElements(); const uint64_t dimRank = coo.getRank(); - const uint64_t nse = elements.size(); + const uint64_t nse = coo.getNse(); std::fstream file; file.open(filename, std::ios_base::out | std::ios_base::trunc); assert(file.is_open()); @@ -449,10 +448,10 @@ file << dimSizes[d] << " "; file << dimSizes[dimRank - 1] << std::endl; for (uint64_t i = 0; i < nse; ++i) { - const auto &coords = elements[i].coords; + const auto &coords = coo.getCoords(i); for (uint64_t d = 0; d < dimRank; ++d) file << (coords[d] + 1) << " "; - file << elements[i].value << std::endl; + file << coo.getValue(i) << std::endl; } file.flush(); file.close(); diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -506,7 +506,7 @@ // TODO: This assertion assumes there are no stored zeros, // or if there are then that we don't filter them out. // Cf., - assert(coo->getElements().size() == values.size()); + assert(coo->getNse() == values.size()); return coo; } @@ -590,32 +590,32 @@ /// coordinates arrays under the given per-level dense/sparse annotations. /// /// Preconditions: - /// * the `lvlElements` must be lexicographically sorted. + /// * the elements in `lvlCOO` must be lexicographically sorted. /// * the coordinates of every element are valid for `getLvlSizes()` /// (i.e., equal rank and pointwise less-than). - void fromCOO(const std::vector> &lvlElements, uint64_t lo, - uint64_t hi, uint64_t l) { + void fromCOO(const SparseTensorCOO &lvlCOO, uint64_t lo, uint64_t hi, + uint64_t l) { const uint64_t lvlRank = getLvlRank(); - assert(l <= lvlRank && hi <= lvlElements.size()); + assert(l <= lvlRank && hi <= lvlCOO.getNse()); // Once levels are exhausted, insert the numerical values. if (l == lvlRank) { assert(lo < hi); - values.push_back(lvlElements[lo].value); + values.push_back(lvlCOO.getValue(lo)); return; } // Visit all elements in this interval. uint64_t full = 0; while (lo < hi) { // If `hi` is unchanged, then `lo < lvlElements.size()`. // Find segment in interval with same coordinate at this level. - const uint64_t c = lvlElements[lo].coords[l]; + const uint64_t c = lvlCOO.getCoords(lo)[l]; uint64_t seg = lo + 1; if (isUniqueLvl(l)) - while (seg < hi && lvlElements[seg].coords[l] == c) + while (seg < hi && lvlCOO.getCoords(seg)[l] == c) ++seg; // Handle segment in interval for sparse or dense level. appendCrd(l, full, c); full = c + 1; - fromCOO(lvlElements, lo, seg, l + 1); + fromCOO(lvlCOO, lo, seg, l + 1); // And move on to next segment in interval. lo = seg; } @@ -1045,10 +1045,9 @@ // using `lvlSizes = lvlCOO.getDimSizes()` in the ctor above.) lvlCOO.sort(); // Now actually insert the `elements`. - const auto &elements = lvlCOO.getElements(); - const uint64_t nse = elements.size(); + const uint64_t nse = lvlCOO.getNse(); values.reserve(nse); - fromCOO(elements, 0, nse, 0); + fromCOO(lvlCOO, 0, nse, 0); } template diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -91,7 +91,8 @@ /// callers must not free the underlying COO object, since the iterator's /// dtor will do so. explicit SparseTensorIterator(const SparseTensorCOO *coo) - : coo(coo), it(coo->begin()), end(coo->end()) {} + : coo(coo), curr_it(coo->begin()), next_it(coo->begin()), + end_it(coo->end()) {} ~SparseTensorIterator() { delete coo; } @@ -99,14 +100,29 @@ SparseTensorIterator(const SparseTensorIterator &) = delete; SparseTensorIterator &operator=(const SparseTensorIterator &) = delete; - /// Gets the next element. If there are no remaining elements, then - /// returns nullptr. - const Element *getNext() { return it < end ? &*it++ : nullptr; } + /// Moves the iterator to the next element and returns the iterator. If there + /// are no remaining elements, then returns nullptr. + const SparseTensorIterator *getNext() { + if (next_it < end_it) { + curr_it = next_it; + next_it++; + return this; + } + + return nullptr; + } + + /// Gets the value for the current element. + V getValue() const { return coo->getValueForId(*curr_it); } + + /// Gets the pointer to the coordinates of the current element. + const uint64_t *getCoords() const { return coo->getCoordsForId(*curr_it); } private: const SparseTensorCOO *const coo; // Owning pointer. - typename SparseTensorCOO::const_iterator it; - const typename SparseTensorCOO::const_iterator end; + typename SparseTensorCOO::const_iterator curr_it; + typename SparseTensorCOO::const_iterator next_it; + const typename SparseTensorCOO::const_iterator end_it; }; // TODO: When using this library from MLIR, the `toMLIRSparseTensor`/ @@ -187,8 +203,7 @@ SparseTensorCOO *coo = tensor->toCOO(dimRank, dimSizes.data(), dimRank, identityPerm.data()); - const std::vector> &elements = coo->getElements(); - const uint64_t nse = elements.size(); + const uint64_t nse = coo->getNse(); const auto &cooSizes = coo->getDimSizes(); assert(cooSizes.size() == dimRank && "Rank mismatch"); @@ -201,9 +216,9 @@ uint64_t *coordinates = new uint64_t[dimRank * nse]; for (uint64_t i = 0, base = 0; i < nse; ++i) { - values[i] = elements[i].value; + values[i] = coo->getValue(i); for (uint64_t d = 0; d < dimRank; ++d) - coordinates[base + d] = elements[i].coords[d]; + coordinates[base + d] = coo->getCoords(i)[d]; base += dimRank; } @@ -540,13 +555,13 @@ index_type *coords = MEMREF_GET_PAYLOAD(cref); \ V *value = MEMREF_GET_PAYLOAD(vref); \ const uint64_t rank = MEMREF_GET_USIZE(cref); \ - const Element *elem = \ - static_cast *>(iter)->getNext(); \ + const auto *elem = \ + static_cast *>(iter) -> getNext(); \ if (elem == nullptr) \ return false; \ for (uint64_t d = 0; d < rank; d++) \ - coords[d] = elem->coords[d]; \ - *value = elem->value; \ + coords[d] = elem->getCoords()[d]; \ + *value = elem->getValue(); \ return true; \ } MLIR_SPARSETENSOR_FOREVERY_V(IMPL_GETNEXT)