diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h @@ -26,14 +26,21 @@ namespace mlir { namespace sparse_tensor { -/// A sparse tensor element in coordinate scheme (value and indices). -/// For example, a rank-1 vector element would look like +/// An element of a sparse tensor in coordinate-scheme representation +/// (i.e., a pair of indices and value). For example, a rank-1 vector +/// element would look like /// ({i}, a[i]) -/// and a rank-5 tensor element like +/// and a rank-5 tensor element would look like /// ({i,j,k,l,m}, a[i,j,k,l,m]) -/// We use pointer to a shared index pool rather than e.g. a direct -/// vector since that (1) reduces the per-element memory footprint, and -/// (2) centralizes the memory reservation and (re)allocation to one place. +/// +/// The indices are represented as a (non-owning) pointer into a shared +/// pool of indices, rather than being stored directly in this object. +/// This significantly improves performance because it: (1) reduces +/// the per-element memory footprint, and (2) centralizes the memory +/// management for indices. The only downside is that the indices +/// themselves cannot be retrieved without knowing the rank of the +/// tensor to which this element belongs (and that rank is not stored +/// in this object). template struct Element final { Element(const uint64_t *ind, V val) : indices(ind), value(val){}; @@ -48,11 +55,11 @@ using ElementConsumer = const std::function &, V)> &; -/// A memory-resident sparse tensor in coordinate scheme (collection of -/// elements). This data structure is used to read a sparse tensor from -/// any external format into memory and sort the elements lexicographically -/// by indices before passing it back to the client (most packed storage -/// formats require the elements to appear in lexicographic index order). +/// A memory-resident sparse tensor in coordinate-scheme representation +/// (a collection of `Element`s). This data structure is used as +/// an intermediate representation; e.g., for reading sparse tensors +/// from external formats into memory, or for certain conversions between +/// different `SparseTensorStorage` formats. template class SparseTensorCOO final { public: @@ -70,6 +77,8 @@ /// fully permuted coordinate scheme. /// /// Precondition: `dimSizes` and `perm` must be valid for `rank`. + /// + /// Asserts: the elements of `dimSizes` are non-zero. static SparseTensorCOO *newSparseTensorCOO(uint64_t rank, const uint64_t *dimSizes, const uint64_t *perm, @@ -85,13 +94,21 @@ /// Get the rank of the tensor. uint64_t getRank() const { return dimSizes.size(); } - /// Getter for the dimension-sizes array. + /// Get the dimension-sizes array. const std::vector &getDimSizes() const { return dimSizes; } - /// Getter for the elements array. + /// Get the elements array. const std::vector> &getElements() const { return elements; } - /// Adds element as indices and value. + /// Adds an element to the tensor. This method does not check whether + /// `ind` is already associated with a value, it adds it regardless. + /// Resolving such conflicts is left up to clients of the iterator + /// interface. + /// + /// Asserts: + /// * is not in iterator mode + /// * the `ind` is valid for `rank` + /// * the elements of `ind` are valid for `dimSizes`. void add(const std::vector &ind, V val) { assert(!iteratorLocked && "Attempt to add() after startIterator()"); const uint64_t *base = indices.data(); @@ -117,7 +134,10 @@ elements.emplace_back(base + size, val); } - /// Sorts elements lexicographically by index. + /// Sorts elements lexicographically by index. If an index is mapped to + /// multiple values, then the relative order of those values is unspecified. + /// + /// Asserts: is not in iterator mode. void sort() { assert(!iteratorLocked && "Attempt to sort() after startIterator()"); // TODO: we may want to cache an `isSorted` bit, to avoid @@ -134,13 +154,17 @@ }); } - /// Switch into iterator mode. + /// Switch into iterator mode. If already in iterator mode, then + /// resets the position to the first element. void startIterator() { iteratorLocked = true; iteratorPos = 0; } - /// Get the next element. + /// Get the next element. If there are no remaining elements, then + /// returns nullptr and switches out of iterator mode. + /// + /// Asserts: is in iterator mode. const Element *getNext() { assert(iteratorLocked && "Attempt to getNext() before startIterator()"); if (iteratorPos < elements.size()) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/CheckedMul.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/CheckedMul.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/CheckedMul.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/CheckedMul.h @@ -29,7 +29,12 @@ namespace sparse_tensor { namespace detail { -/// A version of `operator*` on `uint64_t` which checks for overflows. +// TODO: would be better to use various architectures' intrinsics to +// detect the overflow directly, instead of doing the assertion beforehand +// (which requires an expensive division). +// +/// A version of `operator*` on `uint64_t` which guards against overflows +/// (when assertions are enabled). inline uint64_t checkedMul(uint64_t lhs, uint64_t rhs) { assert((lhs == 0 || rhs <= std::numeric_limits::max() / lhs) && "Integer overflow"); diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/ErrorHandling.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/ErrorHandling.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/ErrorHandling.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/ErrorHandling.h @@ -28,11 +28,12 @@ #include #include -// This macro helps minimize repetition of this idiom, as well as ensuring -// we have some additional output indicating where the error is coming from. -// (Since `fprintf` doesn't provide a stacktrace, this helps make it easier -// to track down whether an error is coming from our code vs somewhere else -// in MLIR.) +/// This macro helps minimize repetition of the printf-and-exit idiom, +/// as well as ensuring that we print some additional output indicating +/// where the error is coming from--- to make it easier to determine +/// whether some particular error is coming from the runtime library's +/// code vs from somewhere else in the MLIR stack. (Since that can be +/// hard to determine without the stacktraces provided by assertion failures.) #define MLIR_SPARSETENSOR_FATAL(...) \ do { \ fprintf(stderr, "SparseTensorUtils: " __VA_ARGS__); \ diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h @@ -45,11 +45,14 @@ class SparseTensorFile final { public: enum class ValueKind { + // The value before calling `readHeader`. kInvalid = 0, + // Values that can be set by `readMMEHeader`. kPattern = 1, kReal = 2, kInteger = 3, kComplex = 4, + // The value set by `readExtFROSTTHeader`. kUndefined = 5 }; @@ -80,6 +83,7 @@ ValueKind getValueKind() const { return valueKind_; } + /// Checks if a header has been successfully read. bool isValid() const { return valueKind_ != ValueKind::kInvalid; } /// Gets the MME "pattern" property setting. Is only valid after @@ -125,7 +129,13 @@ void assertMatchesShape(uint64_t rank, const uint64_t *shape) const; private: + /// Read the MME header of a general sparse matrix of type real. void readMMEHeader(); + + /// Read the "extended" FROSTT header. Although not part of the + /// documented format, we assume that the file starts with optional + /// comments followed by two lines that define the rank, the number of + /// nonzeros, and the dimensions sizes (one per rank) of the sparse tensor. void readExtFROSTTHeader(); static constexpr int kColWidth = 1025; @@ -137,6 +147,7 @@ char line[kColWidth]; }; +//===----------------------------------------------------------------------===// namespace detail { // Adds a value to a tensor in coordinate scheme. If is_symmetric_value is true, @@ -153,8 +164,8 @@ coo->add({indices[1], indices[0]}, value); } -// Reads an element of a complex type for the current indices in coordinate -// scheme. +/// Reads an element of a complex type for the current indices in +/// coordinate scheme. template inline void readCOOValue(SparseTensorCOO> *coo, const std::vector indices, char **linePtr, diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -87,7 +87,7 @@ /// Get the rank of the tensor. uint64_t getRank() const { return dimSizes.size(); } - /// Getter for the dimension-sizes array, in storage-order. + /// Get the dimension-sizes array, in storage-order. const std::vector &getDimSizes() const { return dimSizes; } /// Safely lookup the size of the given (storage-order) dimension. @@ -96,11 +96,11 @@ return dimSizes[d]; } - /// Getter for the "reverse" permutation, which maps this object's + /// Get the "reverse" permutation, which maps this object's /// storage-order to the tensor's semantic-order. const std::vector &getRev() const { return rev; } - /// Getter for the dimension-types array, in storage-order. + /// Get the dimension-types array, in storage-order. const std::vector &getDimTypes() const { return dimTypes; } /// Safely check if the (storage-order) dimension uses dense storage. @@ -172,11 +172,13 @@ FOREVERY_V(DECL_NEWENUMERATOR) #undef DECL_NEWENUMERATOR - /// Overhead storage. + /// Pointers-overhead storage. #define DECL_GETPOINTERS(PNAME, P) \ virtual void getPointers(std::vector

**, uint64_t); FOREVERY_FIXED_O(DECL_GETPOINTERS) #undef DECL_GETPOINTERS + + /// Indices-overhead storage. #define DECL_GETINDICES(INAME, I) \ virtual void getIndices(std::vector **, uint64_t); FOREVERY_FIXED_O(DECL_GETINDICES) @@ -214,11 +216,12 @@ class SparseTensorEnumerator; /// A memory-resident sparse tensor using a storage scheme based on -/// per-dimension sparse/dense annotations. This data structure provides a -/// bufferized form of a sparse tensor type. In contrast to generating setup -/// methods for each differently annotated sparse tensor, this method provides -/// a convenient "one-size-fits-all" solution that simply takes an input tensor -/// and annotations to implement all required setup in a general manner. +/// per-dimension sparse/dense annotations. This data structure provides +/// a bufferized form of a sparse tensor type. In contrast to generating +/// setup methods for each differently annotated sparse tensor, this +/// method provides a convenient "one-size-fits-all" solution that simply +/// takes an input tensor and annotations to implement all required setup +/// in a general manner. template class SparseTensorStorage final : public SparseTensorStorageBase { /// Private constructor to share code between the other constructors. @@ -340,6 +343,9 @@ endPath(0); } + /// Allocate a new enumerator for this classes `` types and + /// erase the `` parts from the type. Callers must make sure to + /// delete the enumerator when they're done with it. void newEnumerator(SparseTensorEnumeratorBase **out, uint64_t rank, const uint64_t *perm) const final { *out = new SparseTensorEnumerator(*this, rank, perm); diff --git a/mlir/lib/ExecutionEngine/SparseTensor/File.cpp b/mlir/lib/ExecutionEngine/SparseTensor/File.cpp --- a/mlir/lib/ExecutionEngine/SparseTensor/File.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensor/File.cpp @@ -83,7 +83,7 @@ "Dimension size mismatch"); } -/// Helper to convert string to lower case. +/// Helper to convert C-style strings (i.e., '\0' terminated) to lower case. static inline char *toLower(char *token) { for (char *c = token; *c; ++c) *c = tolower(*c); diff --git a/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp b/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp --- a/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp @@ -20,6 +20,13 @@ using namespace mlir::sparse_tensor; +//===----------------------------------------------------------------------===// +/// Allocate the statistics structure for the desired sizes and +/// sparsity (in the target tensor's storage-order). This constructor +/// does not actually populate the statistics, however; for that see +/// `initialize`. +/// +/// Precondition: `dimSizes` must not contain zeros. SparseTensorNNZ::SparseTensorNNZ(const std::vector &dimSizes, const std::vector &sparsity) : dimSizes(dimSizes), dimTypes(sparsity), nnz(getRank()) { @@ -51,6 +58,10 @@ } } +/// Lexicographically enumerates all indicies for dimensions strictly +/// less than `stopDim`, and passes their nnz statistic to the callback. +/// Since our use-case only requires the statistic not the coordinates +/// themselves, we do not bother to construct those coordinates. void SparseTensorNNZ::forallIndices(uint64_t stopDim, SparseTensorNNZ::NNZConsumer yield) const { assert(stopDim < getRank() && "Dimension out of bounds"); @@ -59,6 +70,11 @@ forallIndices(yield, stopDim, 0, 0); } +/// Adds a new element (i.e., increment its statistics). We use +/// a method rather than inlining into the lambda in `initialize`, +/// to avoid spurious templating over `V`. And this method is private +/// to avoid needing to re-assert validity of `ind` (which is guaranteed +/// by `forallElements`). void SparseTensorNNZ::add(const std::vector &ind) { uint64_t parentPos = 0; for (uint64_t rank = getRank(), r = 0; r < rank; ++r) { @@ -68,6 +84,7 @@ } } +/// Recursive component of the public `forallIndices`. void SparseTensorNNZ::forallIndices(SparseTensorNNZ::NNZConsumer yield, uint64_t stopDim, uint64_t parentPos, uint64_t d) const { diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -6,11 +6,13 @@ // //===----------------------------------------------------------------------===// // -// This file implements a light-weight runtime support library that is useful -// for sparse tensor manipulations. The functionality provided in this library -// is meant to simplify benchmarking, testing, and debugging MLIR code that -// operates on sparse tensors. The provided functionality is **not** part -// of core MLIR, however. +// This file implements a light-weight runtime support library for +// manipulating sparse tensors from MLIR. More specifically, it provides +// C-API wrappers so that MLIR-generated code can call into the C++ runtime +// support library. The functionality provided in this library is meant +// to simplify benchmarking, testing, and debugging of MLIR code operating +// on sparse tensors. However, the provided functionality is **not** +// part of core MLIR itself. // // The following memory-resident sparse storage schemes are supported: // @@ -57,9 +59,18 @@ using namespace mlir::sparse_tensor; +//===----------------------------------------------------------------------===// +// +// Implementation details for public functions, which don't have a good +// place to live in the C++ library this file is wrapping. +// +//===----------------------------------------------------------------------===// + namespace { /// Initializes sparse tensor from an external COO-flavored format. +/// Used by `IMPL_CONVERTTOMLIRSPARSETENSOR`. +// TODO: generalize beyond 64-bit indices. template static SparseTensorStorage * toMLIRSparseTensor(uint64_t rank, uint64_t nse, const uint64_t *shape, @@ -98,6 +109,14 @@ } /// Converts a sparse tensor to an external COO-flavored format. +/// Used by `IMPL_CONVERTFROMMLIRSPARSETENSOR`. +// +// TODO: Currently, values are copied from SparseTensorStorage to +// SparseTensorCOO, then to the output. We may want to reduce the number +// of copies. +// +// TODO: generalize beyond 64-bit indices, no dim ordering, all dimensions +// compressed template static void fromMLIRSparseTensor(const SparseTensorStorage *tensor, @@ -490,7 +509,6 @@ } // We can't use `static_cast` here because `DimLevelType` is an enum-class. -// TODO: generalize beyond 64-bit indices. #define IMPL_CONVERTTOMLIRSPARSETENSOR(VNAME, V) \ void *convertToMLIRSparseTensor##VNAME( \ uint64_t rank, uint64_t nse, uint64_t *shape, V *values, \ @@ -501,12 +519,6 @@ FOREVERY_V(IMPL_CONVERTTOMLIRSPARSETENSOR) #undef IMPL_CONVERTTOMLIRSPARSETENSOR -// TODO: Currently, values are copied from SparseTensorStorage to -// SparseTensorCOO, then to the output. We may want to reduce the number -// of copies. -// -// TODO: generalize beyond 64-bit indices, no dim ordering, all dimensions -// compressed #define IMPL_CONVERTFROMMLIRSPARSETENSOR(VNAME, V) \ void convertFromMLIRSparseTensor##VNAME(void *tensor, uint64_t *pRank, \ uint64_t *pNse, uint64_t **pShape, \