diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h @@ -26,45 +26,48 @@ namespace mlir { namespace sparse_tensor { -/// An element of a sparse tensor in coordinate-scheme representation -/// (i.e., a pair of coordinates and value). For example, a rank-1 -/// vector element would look like -/// ({i}, a[i]) -/// and a rank-5 tensor element would look like -/// ({i,j,k,l,m}, a[i,j,k,l,m]) -/// -/// The coordinates are represented as a (non-owning) pointer into -/// a shared pool of coordinates, rather than being stored directly in -/// this object. This significantly improves performance because it: -/// (1) reduces the per-element memory footprint, and (2) centralizes -/// the memory management for coordinates. The only downside is that -/// the coordinates themselves cannot be retrieved without knowing the -/// rank of the tensor to which this element belongs (and that rank is -/// not stored in this object). +/// An element ID is used for retrieving the element's value and coordinates +/// from the value-array and coordinares-array. +struct ElementId { + ElementId() : id(0) {} + explicit ElementId(uint64_t id) : id(id) {} + operator uint64_t() const { return id; } + uint64_t id; +}; + +/// The position of an element is the index of the element-ID in the +/// element-ID-array. +using ElementPosition = uint64_t; + +/// An element returned by a COO iterator is represented by a pointer to the +/// coordinates and a value. template struct Element final { - Element(const uint64_t *coords, V val) : coords(coords), value(val){}; - const uint64_t *coords; // pointer into shared coordinates pool + Element(const uint64_t *coords, V val) : coords(coords), value(val) {} + const uint64_t *coords; // The pointer into a shared coordinates pool. V value; }; /// Closure object for `operator<` on `Element` with a given rank. -template struct ElementLT final { - ElementLT(uint64_t rank) : rank(rank) {} + ElementLT(const uint64_t *coordinates, uint64_t rank) + : coordinates(coordinates), rank(rank) {} /// Compares two elements a la `operator<`. /// /// Precondition: the elements must both be valid for `rank`. - bool operator()(const Element &e1, const Element &e2) const { + bool operator()(ElementId e1, ElementId e2) const { + uint64_t const *const coords1 = coordinates + e1 * rank; + uint64_t const *const coords2 = coordinates + e2 * rank; for (uint64_t d = 0; d < rank; ++d) { - if (e1.coords[d] == e2.coords[d]) + if (coords1[d] == coords2[d]) continue; - return e1.coords[d] < e2.coords[d]; + return coords1[d] < coords2[d]; } return false; } + const uint64_t *coordinates; // A pointer to the coordinates array. const uint64_t rank; }; @@ -81,6 +84,20 @@ /// from external formats into memory, or for certain conversions between /// different `SparseTensorStorage` formats. /// +/// Elements of a sparse tensor are stored in three arrays: an array for +/// coordinates, an array for values, and an array for the element IDs. An +/// element ID is used for retrieving the element's value and coordinates from +/// the appropriate arrays. The coordinates of an element are stored at indices +/// [rank*ID .. rank*(ID+1)) in the coordinates-array. The element-ID-array +/// allows us to sort the elements without having to move the coordinates or +/// values. +/// +/// This significantly improves performance because it: +/// (1) reduces the per-element memory footprint, and (2) centralizes +/// the memory management for coordinates and values. The only downside is that +/// the iterator can't simply return a pointer to the underlying storage for the +/// elements. +/// /// This class provides all the typedefs required by the "Container" /// concept (); /// however, beware that it cannot fully implement that concept since @@ -90,17 +107,61 @@ template class SparseTensorCOO final { public: + class Iterator; using value_type = const Element; using reference = value_type &; using const_reference = reference; - // The types associated with `std::vector` differ significantly between - // C++11/17 vs C++20; so we explicitly defer to whatever `std::vector` - // says the types should be. - using vector_type = std::vector>; - using iterator = typename vector_type::const_iterator; + using iterator = Iterator; using const_iterator = iterator; - using difference_type = typename vector_type::difference_type; - using size_type = typename vector_type::size_type; + using difference_type = typename iterator::difference_type; + using size_type = typename iterator::size_type; + + // An iterator over the elements of the COO in the order given by the + // element-ID-array. This can't be an std::iterator because the value type + // Element the iterator returns is not a type that is used in the actual + // storage. As such, the operator* constructs an object of type Element. + class Iterator final { + public: + using size_type = size_t; + using difference_type = ptrdiff_t; + + Element operator*() const { + return Element(coo.getCoords(*iter), coo.getValue(*iter)); + } + + Iterator &operator++() { + ++iter; + return *this; + } + + Iterator operator++(int) { + Iterator tmp = *this; + ++iter; + return tmp; + } + + friend bool operator<(const Iterator &a, const Iterator &b) { + return a.iter < b.iter; + } + + friend bool operator==(const Iterator &a, const Iterator &b) { + return a.iter == b.iter; + } + + friend bool operator!=(const Iterator &a, const Iterator &b) { + return a.iter != b.iter; + } + + private: + friend class SparseTensorCOO; + using ElementIDIterImpl = typename std::vector::const_iterator; + + Iterator(const SparseTensorCOO &coo, ElementIDIterImpl iter) + : coo(coo), iter(iter) {} + + const SparseTensorCOO &coo; + ElementIDIterImpl iter; + }; /// Constructs a new coordinate-scheme sparse tensor with the given /// sizes and initial storage capacity. @@ -131,8 +192,9 @@ for (uint64_t d = 0; d < dimRank; ++d) assert(dimSizes[d] > 0 && "Dimension size zero has trivial storage"); if (capacity) { - elements.reserve(capacity); + elementIds.reserve(capacity); coordinates.reserve(capacity * dimRank); + values.reserve(capacity); } } @@ -142,11 +204,29 @@ /// Gets the dimension-sizes array. const std::vector &getDimSizes() const { return dimSizes; } - /// Gets the elements array. - const std::vector> &getElements() const { return elements; } + /// Returns the number of stored elements. + uint64_t getNse() const { return elementIds.size(); } + + /// Returns the value for the element with the given ID. + V getValue(ElementId i) const { return values[i]; } + + /// Returns the coordinates for the element with the given ID. + const uint64_t *getCoords(ElementId i) const { + return &coordinates[i * getRank()]; + } + + /// Returns the value for the element at the given position. + V getValue(ElementPosition n) const { return getValue(elementIds[n]); } + + /// Returns the coordinates for the element at the given position. + const uint64_t *getCoords(ElementPosition n) const { + return getCoords(elementIds[n]); + } /// Returns the `operator<` closure object for the COO's element type. - ElementLT getElementLT() const { return ElementLT(getRank()); } + ElementLT getElementLT() const { + return ElementLT(coordinates.data(), getRank()); + } /// Adds an element to the tensor. This method does not check whether /// `dimCoords` is already associated with a value, it adds it regardless. @@ -159,8 +239,6 @@ /// * the `dimCoords` is valid for `getRank`. /// * the components of `dimCoords` are valid for `getDimSizes`. void add(const std::vector &dimCoords, V val) { - const uint64_t *base = coordinates.data(); - const uint64_t size = coordinates.size(); const uint64_t dimRank = getRank(); assert(dimCoords.size() == dimRank && "Element rank mismatch"); for (uint64_t d = 0; d < dimRank; ++d) { @@ -168,26 +246,19 @@ "Coordinate is too large for the dimension"); coordinates.push_back(dimCoords[d]); } - // This base only changes if `coordinates` was reallocated. In which - // case, we need to correct all previous pointers into the vector. - // Note that this only happens if we did not set the initial capacity - // right, and then only for every internal vector reallocation (which - // with the doubling rule should only incur an amortized linear overhead). - const uint64_t *const newBase = coordinates.data(); - if (newBase != base) { - for (uint64_t i = 0, n = elements.size(); i < n; ++i) - elements[i].coords = newBase + (elements[i].coords - base); - base = newBase; - } - // Add the new element and update the sorted bit. - const Element addedElem(base + size, val); - if (!elements.empty() && isSorted) - isSorted = getElementLT()(elements.back(), addedElem); - elements.push_back(addedElem); + values.push_back(val); + const ElementId elemId = ElementId(elementIds.size()); + if (elemId != 0 && isSorted) + isSorted = getElementLT()(elementIds.back(), elemId); + elementIds.push_back(elemId); } - const_iterator begin() const { return elements.cbegin(); } - const_iterator end() const { return elements.cend(); } + const_iterator begin() const { + return const_iterator(*this, elementIds.cbegin()); + } + const_iterator end() const { + return const_iterator(*this, elementIds.cend()); + } /// Sorts elements lexicographically by coordinates. If a coordinate /// is mapped to multiple values, then the relative order of those @@ -197,14 +268,17 @@ void sort() { if (isSorted) return; - std::sort(elements.begin(), elements.end(), getElementLT()); + std::sort(elementIds.begin(), elementIds.end(), getElementLT()); isSorted = true; } private: const std::vector dimSizes; // per-dimension sizes - std::vector> elements; // all COO elements + // All element IDs. When isSorted == true, this array stores the IDs in the + // order of the sorted elements. + std::vector elementIds; std::vector coordinates; // shared coordinate pool + std::vector values; bool isSorted; }; diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h @@ -438,9 +438,8 @@ const char *filename) { assert(filename && "Got nullptr for filename"); const auto &dimSizes = coo.getDimSizes(); - const auto &elements = coo.getElements(); const uint64_t dimRank = coo.getRank(); - const uint64_t nse = elements.size(); + const uint64_t nse = coo.getNse(); std::fstream file; file.open(filename, std::ios_base::out | std::ios_base::trunc); assert(file.is_open()); @@ -448,11 +447,11 @@ for (uint64_t d = 0; d < dimRank - 1; ++d) file << dimSizes[d] << " "; file << dimSizes[dimRank - 1] << std::endl; - for (uint64_t i = 0; i < nse; ++i) { - const auto &coords = elements[i].coords; + for (ElementPosition i = 0; i < nse; ++i) { + const auto &coords = coo.getCoords(i); for (uint64_t d = 0; d < dimRank; ++d) file << (coords[d] + 1) << " "; - file << elements[i].value << std::endl; + file << coo.getValue(i) << std::endl; } file.flush(); file.close(); diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -506,7 +506,7 @@ // TODO: This assertion assumes there are no stored zeros, // or if there are then that we don't filter them out. // Cf., - assert(coo->getElements().size() == values.size()); + assert(coo->getNse() == values.size()); return coo; } @@ -590,32 +590,32 @@ /// coordinates arrays under the given per-level dense/sparse annotations. /// /// Preconditions: - /// * the `lvlElements` must be lexicographically sorted. + /// * the elements in `lvlCOO` must be lexicographically sorted. /// * the coordinates of every element are valid for `getLvlSizes()` /// (i.e., equal rank and pointwise less-than). - void fromCOO(const std::vector> &lvlElements, uint64_t lo, - uint64_t hi, uint64_t l) { + void fromCOO(const SparseTensorCOO &lvlCOO, ElementPosition lo, + ElementPosition hi, uint64_t l) { const uint64_t lvlRank = getLvlRank(); - assert(l <= lvlRank && hi <= lvlElements.size()); + assert(l <= lvlRank && hi <= lvlCOO.getNse()); // Once levels are exhausted, insert the numerical values. if (l == lvlRank) { assert(lo < hi); - values.push_back(lvlElements[lo].value); + values.push_back(lvlCOO.getValue(lo)); return; } // Visit all elements in this interval. uint64_t full = 0; while (lo < hi) { // If `hi` is unchanged, then `lo < lvlElements.size()`. // Find segment in interval with same coordinate at this level. - const uint64_t c = lvlElements[lo].coords[l]; + const uint64_t c = lvlCOO.getCoords(lo)[l]; uint64_t seg = lo + 1; if (isUniqueLvl(l)) - while (seg < hi && lvlElements[seg].coords[l] == c) + while (seg < hi && lvlCOO.getCoords(seg)[l] == c) ++seg; // Handle segment in interval for sparse or dense level. appendCrd(l, full, c); full = c + 1; - fromCOO(lvlElements, lo, seg, l + 1); + fromCOO(lvlCOO, lo, seg, l + 1); // And move on to next segment in interval. lo = seg; } @@ -1045,10 +1045,9 @@ // using `lvlSizes = lvlCOO.getDimSizes()` in the ctor above.) lvlCOO.sort(); // Now actually insert the `elements`. - const auto &elements = lvlCOO.getElements(); - const uint64_t nse = elements.size(); + const uint64_t nse = lvlCOO.getNse(); values.reserve(nse); - fromCOO(elements, 0, nse, 0); + fromCOO(lvlCOO, 0, nse, 0); } template diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -100,8 +100,10 @@ SparseTensorIterator &operator=(const SparseTensorIterator &) = delete; /// Gets the next element. If there are no remaining elements, then - /// returns nullptr. - const Element *getNext() { return it < end ? &*it++ : nullptr; } + /// returns std::nullopt. + std::optional> getNext() { + return it < end ? std::optional>(*it++) : std::nullopt; + } private: const SparseTensorCOO *const coo; // Owning pointer. @@ -187,8 +189,7 @@ SparseTensorCOO *coo = tensor->toCOO(dimRank, dimSizes.data(), dimRank, identityPerm.data()); - const std::vector> &elements = coo->getElements(); - const uint64_t nse = elements.size(); + const uint64_t nse = coo->getNse(); const auto &cooSizes = coo->getDimSizes(); assert(cooSizes.size() == dimRank && "Rank mismatch"); @@ -200,10 +201,10 @@ V *values = new V[nse]; uint64_t *coordinates = new uint64_t[dimRank * nse]; - for (uint64_t i = 0, base = 0; i < nse; ++i) { - values[i] = elements[i].value; + for (ElementPosition i = 0, base = 0; i < nse; ++i) { + values[i] = coo->getValue(i); for (uint64_t d = 0; d < dimRank; ++d) - coordinates[base + d] = elements[i].coords[d]; + coordinates[base + d] = coo->getCoords(i)[d]; base += dimRank; } @@ -540,9 +541,9 @@ index_type *coords = MEMREF_GET_PAYLOAD(cref); \ V *value = MEMREF_GET_PAYLOAD(vref); \ const uint64_t rank = MEMREF_GET_USIZE(cref); \ - const Element *elem = \ - static_cast *>(iter)->getNext(); \ - if (elem == nullptr) \ + const auto elem = \ + static_cast *>(iter) -> getNext(); \ + if (!elem.has_value()) \ return false; \ for (uint64_t d = 0; d < rank; d++) \ coords[d] = elem->coords[d]; \