diff --git a/mlir/include/mlir/ExecutionEngine/Float16bits.h b/mlir/include/mlir/ExecutionEngine/Float16bits.h --- a/mlir/include/mlir/ExecutionEngine/Float16bits.h +++ b/mlir/include/mlir/ExecutionEngine/Float16bits.h @@ -17,23 +17,36 @@ #include #include +#ifdef _WIN32 +#ifdef mlir_float16_utils_EXPORTS // We are building this library +#define MLIR_FLOAT16_EXPORT __declspec(dllexport) +#define MLIR_FLOAT16_DEFINE_FUNCTIONS +#else // We are using this library +#define MLIR_FLOAT16_EXPORT __declspec(dllimport) +#endif // mlir_float16_utils_EXPORTS +#else // Non-windows: use visibility attributes. +#define MLIR_FLOAT16_EXPORT __attribute__((visibility("default"))) +#define MLIR_FLOAT16_DEFINE_FUNCTIONS +#endif // _WIN32 + // Implements half precision and bfloat with f16 and bf16, using the MLIR type // names. These data types are also used for c-interface runtime routines. extern "C" { -struct f16 { +struct MLIR_FLOAT16_EXPORT f16 { f16(float f = 0); uint16_t bits; }; -struct bf16 { +struct MLIR_FLOAT16_EXPORT bf16 { bf16(float f = 0); uint16_t bits; }; } // Outputs a half precision value. -std::ostream &operator<<(std::ostream &os, const f16 &f); +MLIR_FLOAT16_EXPORT std::ostream &operator<<(std::ostream &os, const f16 &f); // Outputs a bfloat value. -std::ostream &operator<<(std::ostream &os, const bf16 &d); +MLIR_FLOAT16_EXPORT std::ostream &operator<<(std::ostream &os, const bf16 &d); +#undef MLIR_FLOAT16_EXPORT #endif // MLIR_EXECUTIONENGINE_FLOAT16BITS_H_ diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h @@ -0,0 +1,163 @@ +//===- COO.h - Coordinate-scheme sparse tensor representation ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is part of the lightweight runtime support library for sparse +// tensor manipulations. The functionality of the support library is meant +// to simplify benchmarking, testing, and debugging MLIR code operating on +// sparse tensors. However, the provided functionality is **not** part of +// core MLIR itself. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_COO_H +#define MLIR_EXECUTIONENGINE_SPARSETENSOR_COO_H + +#include +#include +#include +#include +#include + +namespace mlir { +namespace sparse_tensor { + +/// A sparse tensor element in coordinate scheme (value and indices). +/// For example, a rank-1 vector element would look like +/// ({i}, a[i]) +/// and a rank-5 tensor element like +/// ({i,j,k,l,m}, a[i,j,k,l,m]) +/// We use pointer to a shared index pool rather than e.g. a direct +/// vector since that (1) reduces the per-element memory footprint, and +/// (2) centralizes the memory reservation and (re)allocation to one place. +template +struct Element final { + Element(uint64_t *ind, V val) : indices(ind), value(val){}; + uint64_t *indices; // pointer into shared index pool + V value; +}; + +/// The type of callback functions which receive an element. We avoid +/// packaging the coordinates and value together as an `Element` object +/// because this helps keep code somewhat cleaner. +template +using ElementConsumer = + const std::function &, V)> &; + +/// A memory-resident sparse tensor in coordinate scheme (collection of +/// elements). This data structure is used to read a sparse tensor from +/// any external format into memory and sort the elements lexicographically +/// by indices before passing it back to the client (most packed storage +/// formats require the elements to appear in lexicographic index order). +template +struct SparseTensorCOO final { +public: + SparseTensorCOO(const std::vector &dimSizes, uint64_t capacity) + : dimSizes(dimSizes) { + if (capacity) { + elements.reserve(capacity); + indices.reserve(capacity * getRank()); + } + } + + /// Adds element as indices and value. + void add(const std::vector &ind, V val) { + assert(!iteratorLocked && "Attempt to add() after startIterator()"); + uint64_t *base = indices.data(); + uint64_t size = indices.size(); + uint64_t rank = getRank(); + assert(ind.size() == rank && "Element rank mismatch"); + for (uint64_t r = 0; r < rank; r++) { + assert(ind[r] < dimSizes[r] && "Index is too large for the dimension"); + indices.push_back(ind[r]); + } + // This base only changes if indices were reallocated. In that case, we + // need to correct all previous pointers into the vector. Note that this + // only happens if we did not set the initial capacity right, and then only + // for every internal vector reallocation (which with the doubling rule + // should only incur an amortized linear overhead). + uint64_t *newBase = indices.data(); + if (newBase != base) { + for (uint64_t i = 0, n = elements.size(); i < n; i++) + elements[i].indices = newBase + (elements[i].indices - base); + base = newBase; + } + // Add element as (pointer into shared index pool, value) pair. + elements.emplace_back(base + size, val); + } + + /// Sorts elements lexicographically by index. + void sort() { + assert(!iteratorLocked && "Attempt to sort() after startIterator()"); + // TODO: we may want to cache an `isSorted` bit, to avoid + // unnecessary/redundant sorting. + uint64_t rank = getRank(); + std::sort(elements.begin(), elements.end(), + [rank](const Element &e1, const Element &e2) { + for (uint64_t r = 0; r < rank; r++) { + if (e1.indices[r] == e2.indices[r]) + continue; + return e1.indices[r] < e2.indices[r]; + } + return false; + }); + } + + /// Get the rank of the tensor. + uint64_t getRank() const { return dimSizes.size(); } + + /// Getter for the dimension-sizes array. + const std::vector &getDimSizes() const { return dimSizes; } + + /// Getter for the elements array. + const std::vector> &getElements() const { return elements; } + + /// Switch into iterator mode. + void startIterator() { + iteratorLocked = true; + iteratorPos = 0; + } + + /// Get the next element. + const Element *getNext() { + assert(iteratorLocked && "Attempt to getNext() before startIterator()"); + if (iteratorPos < elements.size()) + return &(elements[iteratorPos++]); + iteratorLocked = false; + return nullptr; + } + + /// Factory method. Permutes the original dimensions according to + /// the given ordering and expects subsequent add() calls to honor + /// that same ordering for the given indices. The result is a + /// fully permuted coordinate scheme. + /// + /// Precondition: `dimSizes` and `perm` must be valid for `rank`. + static SparseTensorCOO *newSparseTensorCOO(uint64_t rank, + const uint64_t *dimSizes, + const uint64_t *perm, + uint64_t capacity = 0) { + std::vector permsz(rank); + for (uint64_t r = 0; r < rank; r++) { + assert(dimSizes[r] > 0 && "Dimension size zero has trivial storage"); + permsz[perm[r]] = dimSizes[r]; + } + return new SparseTensorCOO(permsz, capacity); + } + +private: + const std::vector dimSizes; // per-dimension sizes + std::vector> elements; // all COO elements + std::vector indices; // shared index pool + bool iteratorLocked = false; + unsigned iteratorPos = 0; +}; + +} // namespace sparse_tensor +} // namespace mlir + +#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_COO_H diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/CheckedMul.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/CheckedMul.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/CheckedMul.h @@ -0,0 +1,43 @@ +//===- CheckedMul.h - multiplication that checks for overflow ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header is not part of the public API. It is placed in the +// includes directory only because that's required by the implementations +// of template-classes. +// +// This file is part of the lightweight runtime support library for sparse +// tensor manipulations. The functionality of the support library is meant +// to simplify benchmarking, testing, and debugging MLIR code operating on +// sparse tensors. However, the provided functionality is **not** part of +// core MLIR itself. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_CHECKEDMUL_H +#define MLIR_EXECUTIONENGINE_SPARSETENSOR_CHECKEDMUL_H + +#include +#include +#include + +namespace mlir { +namespace sparse_tensor { +namespace detail { + +/// A version of `operator*` on `uint64_t` which checks for overflows. +inline uint64_t checkedMul(uint64_t lhs, uint64_t rhs) { + assert((lhs == 0 || rhs <= std::numeric_limits::max() / lhs) && + "Integer overflow"); + return lhs * rhs; +} + +} // namespace detail +} // namespace sparse_tensor +} // namespace mlir + +#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_CHECKEDMUL_H diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Enums.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Enums.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Enums.h @@ -0,0 +1,163 @@ +//===- Enums.h - Enums shared with the runtime ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Typedefs and enums for the lightweight runtime support library for +// sparse tensor manipulations. These are required to be public so that +// they can be shared with `Transforms/SparseTensorConversion.cpp`, since +// they define the arguments to the public functions declared later on. +// +// This file also defines x-macros +// so that we can generate variations of the public functions for each +// supported primary- and/or overhead-type. +// +// This file is part of the lightweight runtime support library for sparse +// tensor manipulations. The functionality of the support library is meant +// to simplify benchmarking, testing, and debugging MLIR code operating on +// sparse tensors. However, the provided functionality is **not** part of +// core MLIR itself. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_ENUMS_H +#define MLIR_EXECUTIONENGINE_SPARSETENSOR_ENUMS_H + +#include "mlir/ExecutionEngine/Float16bits.h" + +#include +#include + +#ifdef _WIN32 +#ifdef mlir_sparse_tensor_utils_EXPORTS // We are building this library +#define MLIR_SPARSETENSOR_EXPORT __declspec(dllexport) +#define MLIR_SPARSETENSOR_DEFINE_FUNCTIONS +#else // We are using this library +#define MLIR_SPARSETENSOR_EXPORT __declspec(dllimport) +#endif // mlir_sparse_tensor_utils_EXPORTS +#else // Non-windows: use visibility attributes. +#define MLIR_SPARSETENSOR_EXPORT __attribute__((visibility("default"))) +#define MLIR_SPARSETENSOR_DEFINE_FUNCTIONS +#endif // _WIN32 + +namespace mlir { +namespace sparse_tensor { + +/// This type is used in the public API at all places where MLIR expects +/// values with the built-in type "index". For now, we simply assume that +/// type is 64-bit, but targets with different "index" bit widths should +/// link with an alternatively built runtime support library. +// TODO: support such targets? +using index_type = uint64_t; + +/// Encoding of overhead types (both pointer overhead and indices +/// overhead), for "overloading" @newSparseTensor. +enum class MLIR_SPARSETENSOR_EXPORT OverheadType : uint32_t { + kIndex = 0, + kU64 = 1, + kU32 = 2, + kU16 = 3, + kU8 = 4 +}; + +// This x-macro calls its argument on every overhead type which has +// fixed-width. It excludes `index_type` because that type is often +// handled specially (e.g., by translating it into the architecture-dependent +// equivalent fixed-width overhead type). +#define FOREVERY_FIXED_O(DO) \ + DO(64, uint64_t) \ + DO(32, uint32_t) \ + DO(16, uint16_t) \ + DO(8, uint8_t) + +// This x-macro calls its argument on every overhead type, including +// `index_type`. +#define FOREVERY_O(DO) \ + FOREVERY_FIXED_O(DO) \ + DO(0, index_type) + +// These are not just shorthands but indicate the particular +// implementation used (e.g., as opposed to C99's `complex double`, +// or MLIR's `ComplexType`). +using complex64 = std::complex; +using complex32 = std::complex; + +/// Encoding of the elemental type, for "overloading" @newSparseTensor. +enum class MLIR_SPARSETENSOR_EXPORT PrimaryType : uint32_t { + kF64 = 1, + kF32 = 2, + kF16 = 3, + kBF16 = 4, + kI64 = 5, + kI32 = 6, + kI16 = 7, + kI8 = 8, + kC64 = 9, + kC32 = 10 +}; + +// This x-macro includes all `V` types. +#define FOREVERY_V(DO) \ + DO(F64, double) \ + DO(F32, float) \ + DO(F16, f16) \ + DO(BF16, bf16) \ + DO(I64, int64_t) \ + DO(I32, int32_t) \ + DO(I16, int16_t) \ + DO(I8, int8_t) \ + DO(C64, complex64) \ + DO(C32, complex32) + +constexpr MLIR_SPARSETENSOR_EXPORT bool +isFloatingPrimaryType(PrimaryType valTy) { + return PrimaryType::kF64 <= valTy && valTy <= PrimaryType::kBF16; +} + +constexpr MLIR_SPARSETENSOR_EXPORT bool +isIntegralPrimaryType(PrimaryType valTy) { + return PrimaryType::kI64 <= valTy && valTy <= PrimaryType::kI8; +} + +constexpr MLIR_SPARSETENSOR_EXPORT bool isRealPrimaryType(PrimaryType valTy) { + return PrimaryType::kF64 <= valTy && valTy <= PrimaryType::kI8; +} + +constexpr MLIR_SPARSETENSOR_EXPORT bool +isComplexPrimaryType(PrimaryType valTy) { + return PrimaryType::kC64 <= valTy && valTy <= PrimaryType::kC32; +} + +/// The actions performed by @newSparseTensor. +enum class MLIR_SPARSETENSOR_EXPORT Action : uint32_t { + kEmpty = 0, + kFromFile = 1, + kFromCOO = 2, + kSparseToSparse = 3, + kEmptyCOO = 4, + kToCOO = 5, + kToIterator = 6, +}; + +/// This enum mimics `SparseTensorEncodingAttr::DimLevelType` for +/// breaking dependency cycles. `SparseTensorEncodingAttr::DimLevelType` +/// is the source of truth and this enum should be kept consistent with it. +enum class MLIR_SPARSETENSOR_EXPORT DimLevelType : uint8_t { + kDense = 0, + kCompressed = 1, + kCompressedNu = 2, + kCompressedNo = 3, + kCompressedNuNo = 4, + kSingleton = 5, + kSingletonNu = 6, + kSingletonNo = 7, + kSingletonNuNo = 8, +}; + +} // namespace sparse_tensor +} // namespace mlir + +#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_ENUMS_H diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/ErrorHandling.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/ErrorHandling.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/ErrorHandling.h @@ -0,0 +1,42 @@ +//===- ErrorHandling.h - Helpers for errors ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header is not part of the public API. It is placed in the +// includes directory only because that's required by the implementations +// of template-classes. +// +// This file defines an extremely lightweight API for fatal errors (not +// arising from assertions). The API does not attempt to be sophisticated +// in any way, it's just the usual "I give up" style of error reporting. +// +// This file is part of the lightweight runtime support library for sparse +// tensor manipulations. The functionality of the support library is meant +// to simplify benchmarking, testing, and debugging MLIR code operating on +// sparse tensors. However, the provided functionality is **not** part of +// core MLIR itself. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_ERRORHANDLING_H +#define MLIR_EXECUTIONENGINE_SPARSETENSOR_ERRORHANDLING_H + +#include +#include + +// This macro helps minimize repetition of this idiom, as well as ensuring +// we have some additional output indicating where the error is coming from. +// (Since `fprintf` doesn't provide a stacktrace, this helps make it easier +// to track down whether an error is coming from our code vs somewhere else +// in MLIR.) +#define MLIR_SPARSETENSOR_FATAL(...) \ + do { \ + fprintf(stderr, "SparseTensorUtils: " __VA_ARGS__); \ + exit(1); \ + } while (0) + +#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_ERRORHANDLING_H diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h @@ -0,0 +1,259 @@ +//===- File.h - Parsing sparse tensors from files ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements parsing and printing of files in one of the +// following external formats: +// +// (1) Matrix Market Exchange (MME): *.mtx +// https://math.nist.gov/MatrixMarket/formats.html +// +// (2) Formidable Repository of Open Sparse Tensors and Tools (FROSTT): *.tns +// http://frostt.io/tensors/file-formats.html +// +// This file is part of the lightweight runtime support library for sparse +// tensor manipulations. The functionality of the support library is meant +// to simplify benchmarking, testing, and debugging MLIR code operating on +// sparse tensors. However, the provided functionality is **not** part of +// core MLIR itself. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H +#define MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H + +#include "mlir/ExecutionEngine/SparseTensor/Storage.h" + +#include + +namespace mlir { +namespace sparse_tensor { + +/// This class abstracts over the information stored in file headers, +/// as well as providing the buffers and methods for parsing those headers. +class SparseTensorFile final { +public: + enum class ValueKind { + kInvalid = 0, + kPattern = 1, + kReal = 2, + kInteger = 3, + kComplex = 4, + kUndefined = 5 + }; + + explicit SparseTensorFile(char *filename) : filename(filename) { + assert(filename && "Received nullptr for filename"); + } + + // Disallows copying, to avoid duplicating the `file` pointer. + SparseTensorFile(const SparseTensorFile &) = delete; + SparseTensorFile &operator=(const SparseTensorFile &) = delete; + + // This dtor tries to avoid leaking the `file`. (Though it's better + // to call `closeFile` explicitly when possible, since there are + // circumstances where dtors are not called reliably.) + ~SparseTensorFile() { closeFile(); } + + /// Opens the file for reading. + void openFile(); + + /// Closes the file. + void closeFile(); + + /// Attempts to read a line from the file. + char *readLine(); + + /// Reads and parses the file's header. + void readHeader(); + + ValueKind getValueKind() const { return valueKind_; } + + bool isValid() const { return valueKind_ != ValueKind::kInvalid; } + + /// Gets the MME "pattern" property setting. Is only valid after + /// parsing the header. + bool isPattern() const { + assert(isValid() && "Attempt to isPattern() before readHeader()"); + return valueKind_ == ValueKind::kPattern; + } + + /// Gets the MME "symmetric" property setting. Is only valid after + /// parsing the header. + bool isSymmetric() const { + assert(isValid() && "Attempt to isSymmetric() before readHeader()"); + return isSymmetric_; + } + + /// Gets the rank of the tensor. Is only valid after parsing the header. + uint64_t getRank() const { + assert(isValid() && "Attempt to getRank() before readHeader()"); + return idata[0]; + } + + /// Gets the number of non-zeros. Is only valid after parsing the header. + uint64_t getNNZ() const { + assert(isValid() && "Attempt to getNNZ() before readHeader()"); + return idata[1]; + } + + /// Gets the dimension-sizes array. The pointer itself is always + /// valid; however, the values stored therein are only valid after + /// parsing the header. + const uint64_t *getDimSizes() const { return idata + 2; } + + /// Safely gets the size of the given dimension. Is only valid + /// after parsing the header. + uint64_t getDimSize(uint64_t d) const { + assert(d < getRank()); + return idata[2 + d]; + } + + /// Asserts the shape subsumes the actual dimension sizes. Is only + /// valid after parsing the header. + void assertMatchesShape(uint64_t rank, const uint64_t *shape) const; + +private: + void readMMEHeader(); + void readExtFROSTTHeader(); + + static constexpr int kColWidth = 1025; + const char *filename; + FILE *file = nullptr; + ValueKind valueKind_ = ValueKind::kInvalid; + bool isSymmetric_ = false; + uint64_t idata[512]; + char line[kColWidth]; +}; + +namespace detail { + +// Adds a value to a tensor in coordinate scheme. If is_symmetric_value is true, +// also adds the value to its symmetric location. +template +inline void addValue(T *coo, V value, const std::vector indices, + bool is_symmetric_value) { + // TODO: + coo->add(indices, value); + // We currently chose to deal with symmetric matrices by fully constructing + // them. In the future, we may want to make symmetry implicit for storage + // reasons. + if (is_symmetric_value) + coo->add({indices[1], indices[0]}, value); +} + +// Reads an element of a complex type for the current indices in coordinate +// scheme. +template +inline void readCOOValue(SparseTensorCOO> *coo, + const std::vector indices, char **linePtr, + bool is_pattern, bool add_symmetric_value) { + // Read two values to make a complex. The external formats always store + // numerical values with the type double, but we cast these values to the + // sparse tensor object type. For a pattern tensor, we arbitrarily pick the + // value 1 for all entries. + V re = is_pattern ? 1.0 : strtod(*linePtr, linePtr); + V im = is_pattern ? 1.0 : strtod(*linePtr, linePtr); + std::complex value = {re, im}; + addValue(coo, value, indices, add_symmetric_value); +} + +// Reads an element of a non-complex type for the current indices in coordinate +// scheme. +template , V>::value && + !std::is_same, V>::value>::type * = nullptr> +inline void readCOOValue(SparseTensorCOO *coo, + const std::vector indices, char **linePtr, + bool is_pattern, bool is_symmetric_value) { + // The external formats always store these numerical values with the type + // double, but we cast these values to the sparse tensor object type. + // For a pattern tensor, we arbitrarily pick the value 1 for all entries. + double value = is_pattern ? 1.0 : strtod(*linePtr, linePtr); + addValue(coo, value, indices, is_symmetric_value); +} + +} // namespace detail + +/// Reads a sparse tensor with the given filename into a memory-resident +/// sparse tensor in coordinate scheme. +template +inline SparseTensorCOO * +openSparseTensorCOO(char *filename, uint64_t rank, const uint64_t *shape, + const uint64_t *perm, PrimaryType valTp) { + SparseTensorFile stfile(filename); + stfile.openFile(); + stfile.readHeader(); + // Check tensor element type against the value type in the input file. + SparseTensorFile::ValueKind valueKind = stfile.getValueKind(); + bool tensorIsInteger = + (valTp >= PrimaryType::kI64 && valTp <= PrimaryType::kI8); + bool tensorIsReal = (valTp >= PrimaryType::kF64 && valTp <= PrimaryType::kI8); + if ((valueKind == SparseTensorFile::ValueKind::kReal && tensorIsInteger) || + (valueKind == SparseTensorFile::ValueKind::kComplex && tensorIsReal)) { + MLIR_SPARSETENSOR_FATAL( + "Tensor element type %d not compatible with values in file %s\n", + static_cast(valTp), filename); + } + stfile.assertMatchesShape(rank, shape); + // Prepare sparse tensor object with per-dimension sizes + // and the number of nonzeros as initial capacity. + uint64_t nnz = stfile.getNNZ(); + auto *coo = SparseTensorCOO::newSparseTensorCOO(rank, stfile.getDimSizes(), + perm, nnz); + // Read all nonzero elements. + std::vector indices(rank); + for (uint64_t k = 0; k < nnz; k++) { + char *linePtr = stfile.readLine(); + for (uint64_t r = 0; r < rank; r++) { + uint64_t idx = strtoul(linePtr, &linePtr, 10); + // Add 0-based index. + indices[perm[r]] = idx - 1; + } + detail::readCOOValue(coo, indices, &linePtr, stfile.isPattern(), + stfile.isSymmetric() && indices[0] != indices[1]); + } + // Close the file and return tensor. + stfile.closeFile(); + return coo; +} + +/// Writes the sparse tensor to `dest` in extended FROSTT format. +template +inline void outSparseTensor(void *tensor, void *dest, bool sort) { + assert(tensor && dest); + auto coo = static_cast *>(tensor); + if (sort) + coo->sort(); + char *filename = static_cast(dest); + auto &dimSizes = coo->getDimSizes(); + auto &elements = coo->getElements(); + uint64_t rank = coo->getRank(); + uint64_t nnz = elements.size(); + std::fstream file; + file.open(filename, std::ios_base::out | std::ios_base::trunc); + assert(file.is_open()); + file << "; extended FROSTT format\n" << rank << " " << nnz << std::endl; + for (uint64_t r = 0; r < rank - 1; r++) + file << dimSizes[r] << " "; + file << dimSizes[rank - 1] << std::endl; + for (uint64_t i = 0; i < nnz; i++) { + auto &idx = elements[i].indices; + for (uint64_t r = 0; r < rank; r++) + file << (idx[r] + 1) << " "; + file << elements[i].value << std::endl; + } + file.flush(); + file.close(); + assert(file.good()); +} + +} // namespace sparse_tensor +} // namespace mlir + +#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -0,0 +1,917 @@ +//===- Storage.h - TACO-flavored sparse tensor representation ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is part of the lightweight runtime support library for sparse +// tensor manipulations. The functionality of the support library is meant +// to simplify benchmarking, testing, and debugging MLIR code operating on +// sparse tensors. However, the provided functionality is **not** part of +// core MLIR itself. +// +// This file contains definitions for the following classes: +// +// * `SparseTensorStorageBase` +// * `SparseTensorStorage` +// * `SparseTensorEnumeratorBase` +// * `SparseTensorEnumerator` +// * `SparseTensorNNZ` +// +// Ideally we would split the storage classes and enumerator classes +// into separate files, to improve legibility. But alas: because these +// are template-classes, they must therefore provide *definitions* in the +// header; and those definitions cause circular dependencies that make it +// impossible to split the file up along the desired lines. (We could +// split the base classes from the derived classes, but that doesn't +// particularly help improve legibility.) +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_STORAGE_H +#define MLIR_EXECUTIONENGINE_SPARSETENSOR_STORAGE_H + +#include "mlir/ExecutionEngine/SparseTensor/COO.h" +#include "mlir/ExecutionEngine/SparseTensor/CheckedMul.h" +#include "mlir/ExecutionEngine/SparseTensor/Enums.h" +#include "mlir/ExecutionEngine/SparseTensor/ErrorHandling.h" + +namespace mlir { +namespace sparse_tensor { + +namespace detail { + +// TODO: try to unify this with `SparseTensorFile::assertMatchesShape` +// which is used by `openSparseTensorCOO`. It's easy enough to resolve +// the `std::vector` vs pointer mismatch for `dimSizes`; but it's trickier +// to resolve the presence/absence of `perm` (without introducing extra +// overhead), so perhaps the code duplication is unavoidable. +// +/// Asserts that the `dimSizes` (in target-order) under the `perm` (mapping +/// semantic-order to target-order) are a refinement of the desired `shape` +/// (in semantic-order). +/// +/// Precondition: `perm` and `shape` must be valid for `rank`. +inline void assertPermutedSizesMatchShape(const std::vector &dimSizes, + uint64_t rank, const uint64_t *perm, + const uint64_t *shape) { + assert(perm && shape); + assert(rank == dimSizes.size() && "Rank mismatch"); + for (uint64_t r = 0; r < rank; r++) + assert((shape[r] == 0 || shape[r] == dimSizes[perm[r]]) && + "Dimension size mismatch"); +} + +} // namespace detail + +// This forward decl is sufficient to split `SparseTensorStorageBase` into +// its own header, but isn't sufficient for `SparseTensorStorage` to join it. +template +class SparseTensorEnumeratorBase; + +/// Abstract base class for `SparseTensorStorage`. This class +/// takes responsibility for all the ``-independent aspects +/// of the tensor (e.g., shape, sparsity, permutation). In addition, +/// we use function overloading to implement "partial" method +/// specialization, which the C-API relies on to catch type errors +/// arising from our use of opaque pointers. +class SparseTensorStorageBase { +public: + /// Constructs a new storage object. The `perm` maps the tensor's + /// semantic-ordering of dimensions to this object's storage-order. + /// The `dimSizes` and `sparsity` arrays are already in storage-order. + /// + /// Precondition: `perm` and `sparsity` must be valid for `dimSizes.size()`. + SparseTensorStorageBase(const std::vector &dimSizes, + const uint64_t *perm, const DimLevelType *sparsity); + + virtual ~SparseTensorStorageBase() = default; + + /// Get the rank of the tensor. + uint64_t getRank() const { return dimSizes.size(); } + + /// Getter for the dimension-sizes array, in storage-order. + const std::vector &getDimSizes() const { return dimSizes; } + + /// Safely lookup the size of the given (storage-order) dimension. + uint64_t getDimSize(uint64_t d) const { + assert(d < getRank()); + return dimSizes[d]; + } + + /// Getter for the "reverse" permutation, which maps this object's + /// storage-order to the tensor's semantic-order. + const std::vector &getRev() const { return rev; } + + /// Getter for the dimension-types array, in storage-order. + const std::vector &getDimTypes() const { return dimTypes; } + + /// Safely check if the (storage-order) dimension uses dense storage. + bool isDenseDim(uint64_t d) const { + assert(d < getRank()); + return dimTypes[d] == DimLevelType::kDense; + } + + /// Safely check if the (storage-order) dimension uses compressed storage. + bool isCompressedDim(uint64_t d) const { + assert(d < getRank()); + switch (dimTypes[d]) { + case DimLevelType::kCompressed: + case DimLevelType::kCompressedNu: + case DimLevelType::kCompressedNo: + case DimLevelType::kCompressedNuNo: + return true; + default: + return false; + } + } + + /// Safely check if the (storage-order) dimension uses singleton storage. + bool isSingletonDim(uint64_t d) const { + assert(d < getRank()); + switch (dimTypes[d]) { + case DimLevelType::kSingleton: + case DimLevelType::kSingletonNu: + case DimLevelType::kSingletonNo: + case DimLevelType::kSingletonNuNo: + return true; + default: + return false; + } + } + + /// Safely check if the (storage-order) dimension is ordered. + bool isOrderedDim(uint64_t d) const { + assert(d < getRank()); + switch (dimTypes[d]) { + case DimLevelType::kCompressedNo: + case DimLevelType::kCompressedNuNo: + case DimLevelType::kSingletonNo: + case DimLevelType::kSingletonNuNo: + return false; + default: + return true; + } + } + + /// Safely check if the (storage-order) dimension is unique. + bool isUniqueDim(uint64_t d) const { + assert(d < getRank()); + switch (dimTypes[d]) { + case DimLevelType::kCompressedNu: + case DimLevelType::kCompressedNuNo: + case DimLevelType::kSingletonNu: + case DimLevelType::kSingletonNuNo: + return false; + default: + return true; + } + } + + /// Allocate a new enumerator. +#define DECL_NEWENUMERATOR(VNAME, V) \ + virtual void newEnumerator(SparseTensorEnumeratorBase **, uint64_t, \ + const uint64_t *) const; + FOREVERY_V(DECL_NEWENUMERATOR) +#undef DECL_NEWENUMERATOR + + /// Overhead storage. +#define DECL_GETPOINTERS(PNAME, P) \ + virtual void getPointers(std::vector

**, uint64_t); + FOREVERY_FIXED_O(DECL_GETPOINTERS) +#undef DECL_GETPOINTERS +#define DECL_GETINDICES(INAME, I) \ + virtual void getIndices(std::vector **, uint64_t); + FOREVERY_FIXED_O(DECL_GETINDICES) +#undef DECL_GETINDICES + + /// Primary storage. +#define DECL_GETVALUES(VNAME, V) virtual void getValues(std::vector **); + FOREVERY_V(DECL_GETVALUES) +#undef DECL_GETVALUES + + /// Element-wise insertion in lexicographic index order. +#define DECL_LEXINSERT(VNAME, V) virtual void lexInsert(const uint64_t *, V); + FOREVERY_V(DECL_LEXINSERT) +#undef DECL_LEXINSERT + + /// Expanded insertion. +#define DECL_EXPINSERT(VNAME, V) \ + virtual void expInsert(uint64_t *, V *, bool *, uint64_t *, uint64_t); + FOREVERY_V(DECL_EXPINSERT) +#undef DECL_EXPINSERT + + /// Finishes insertion. + virtual void endInsert() = 0; + +protected: + // Since this class is virtual, we must disallow public copying in + // order to avoid "slicing". Since this class has data members, + // that means making copying protected. + // + SparseTensorStorageBase(const SparseTensorStorageBase &) = default; + // Copy-assignment would be implicitly deleted (because `dimSizes` + // is const), so we explicitly delete it for clarity. + SparseTensorStorageBase &operator=(const SparseTensorStorageBase &) = delete; + +private: + const std::vector dimSizes; + std::vector rev; + const std::vector dimTypes; +}; + +// This forward decl is necessary for defining `SparseTensorStorage`, +// but isn't sufficient for splitting it off. +template +class SparseTensorEnumerator; + +/// A memory-resident sparse tensor using a storage scheme based on +/// per-dimension sparse/dense annotations. This data structure provides a +/// bufferized form of a sparse tensor type. In contrast to generating setup +/// methods for each differently annotated sparse tensor, this method provides +/// a convenient "one-size-fits-all" solution that simply takes an input tensor +/// and annotations to implement all required setup in a general manner. +template +class SparseTensorStorage final : public SparseTensorStorageBase { + /// Private constructor to share code between the other constructors. + /// Beware that the object is not necessarily guaranteed to be in a + /// valid state after this constructor alone; e.g., `isCompressedDim(d)` + /// doesn't entail `!(pointers[d].empty())`. + /// + /// Precondition: `perm` and `sparsity` must be valid for `dimSizes.size()`. + SparseTensorStorage(const std::vector &dimSizes, + const uint64_t *perm, const DimLevelType *sparsity) + : SparseTensorStorageBase(dimSizes, perm, sparsity), pointers(getRank()), + indices(getRank()), idx(getRank()) {} + +public: + /// Constructs a sparse tensor storage scheme with the given dimensions, + /// permutation, and per-dimension dense/sparse annotations, using + /// the coordinate scheme tensor for the initial contents if provided. + /// + /// Precondition: `perm` and `sparsity` must be valid for `dimSizes.size()`. + SparseTensorStorage(const std::vector &dimSizes, + const uint64_t *perm, const DimLevelType *sparsity, + SparseTensorCOO *coo) + : SparseTensorStorage(dimSizes, perm, sparsity) { + // Provide hints on capacity of pointers and indices. + // TODO: needs much fine-tuning based on actual sparsity; currently + // we reserve pointer/index space based on all previous dense + // dimensions, which works well up to first sparse dim; but + // we should really use nnz and dense/sparse distribution. + bool allDense = true; + uint64_t sz = 1; + for (uint64_t r = 0, rank = getRank(); r < rank; r++) { + if (isCompressedDim(r)) { + // TODO: Take a parameter between 1 and `dimSizes[r]`, and multiply + // `sz` by that before reserving. (For now we just use 1.) + pointers[r].reserve(sz + 1); + pointers[r].push_back(0); + indices[r].reserve(sz); + sz = 1; + allDense = false; + } else if (isSingletonDim(r)) { + indices[r].reserve(sz); + sz = 1; + allDense = false; + } else { // Dense dimension. + assert(isDenseDim(r)); + sz = detail::checkedMul(sz, getDimSizes()[r]); + } + } + // Then assign contents from coordinate scheme tensor if provided. + if (coo) { + // Ensure both preconditions of `fromCOO`. + assert(coo->getDimSizes() == getDimSizes() && "Tensor size mismatch"); + coo->sort(); + // Now actually insert the `elements`. + const std::vector> &elements = coo->getElements(); + uint64_t nnz = elements.size(); + values.reserve(nnz); + fromCOO(elements, 0, nnz, 0); + } else if (allDense) { + values.resize(sz, 0); + } + } + + /// Constructs a sparse tensor storage scheme with the given dimensions, + /// permutation, and per-dimension dense/sparse annotations, using + /// the given sparse tensor for the initial contents. + /// + /// Preconditions: + /// * `perm` and `sparsity` must be valid for `dimSizes.size()`. + /// * The `tensor` must have the same value type `V`. + SparseTensorStorage(const std::vector &dimSizes, + const uint64_t *perm, const DimLevelType *sparsity, + const SparseTensorStorageBase &tensor); + + ~SparseTensorStorage() final = default; + + /// Partially specialize these getter methods based on template types. + void getPointers(std::vector

**out, uint64_t d) final { + assert(d < getRank()); + *out = &pointers[d]; + } + void getIndices(std::vector **out, uint64_t d) final { + assert(d < getRank()); + *out = &indices[d]; + } + void getValues(std::vector **out) final { *out = &values; } + + /// Partially specialize lexicographical insertions based on template types. + void lexInsert(const uint64_t *cursor, V val) final { + // First, wrap up pending insertion path. + uint64_t diff = 0; + uint64_t top = 0; + if (!values.empty()) { + diff = lexDiff(cursor); + endPath(diff + 1); + top = idx[diff] + 1; + } + // Then continue with insertion path. + insPath(cursor, diff, top, val); + } + + /// Partially specialize expanded insertions based on template types. + /// Note that this method resets the values/filled-switch array back + /// to all-zero/false while only iterating over the nonzero elements. + void expInsert(uint64_t *cursor, V *values, bool *filled, uint64_t *added, + uint64_t count) final { + if (count == 0) + return; + // Sort. + std::sort(added, added + count); + // Restore insertion path for first insert. + const uint64_t lastDim = getRank() - 1; + uint64_t index = added[0]; + cursor[lastDim] = index; + lexInsert(cursor, values[index]); + assert(filled[index]); + values[index] = 0; + filled[index] = false; + // Subsequent insertions are quick. + for (uint64_t i = 1; i < count; i++) { + assert(index < added[i] && "non-lexicographic insertion"); + index = added[i]; + cursor[lastDim] = index; + insPath(cursor, lastDim, added[i - 1] + 1, values[index]); + assert(filled[index]); + values[index] = 0; + filled[index] = false; + } + } + + /// Finalizes lexicographic insertions. + void endInsert() final { + if (values.empty()) + finalizeSegment(0); + else + endPath(0); + } + + void newEnumerator(SparseTensorEnumeratorBase **out, uint64_t rank, + const uint64_t *perm) const final { + *out = new SparseTensorEnumerator(*this, rank, perm); + } + + /// Returns this sparse tensor storage scheme as a new memory-resident + /// sparse tensor in coordinate scheme with the given dimension order. + /// + /// Precondition: `perm` must be valid for `getRank()`. + SparseTensorCOO *toCOO(const uint64_t *perm) const { + SparseTensorEnumeratorBase *enumerator; + newEnumerator(&enumerator, getRank(), perm); + SparseTensorCOO *coo = + new SparseTensorCOO(enumerator->permutedSizes(), values.size()); + enumerator->forallElements([&coo](const std::vector &ind, V val) { + coo->add(ind, val); + }); + // TODO: This assertion assumes there are no stored zeros, + // or if there are then that we don't filter them out. + // Cf., + assert(coo->getElements().size() == values.size()); + delete enumerator; + return coo; + } + + /// Factory method. Constructs a sparse tensor storage scheme with the given + /// dimensions, permutation, and per-dimension dense/sparse annotations, + /// using the coordinate scheme tensor for the initial contents if provided. + /// In the latter case, the coordinate scheme must respect the same + /// permutation as is desired for the new sparse tensor storage. + /// + /// Precondition: `shape`, `perm`, and `sparsity` must be valid for `rank`. + static SparseTensorStorage * + newSparseTensor(uint64_t rank, const uint64_t *shape, const uint64_t *perm, + const DimLevelType *sparsity, SparseTensorCOO *coo) { + SparseTensorStorage *n = nullptr; + if (coo) { + const auto &coosz = coo->getDimSizes(); + detail::assertPermutedSizesMatchShape(coosz, rank, perm, shape); + n = new SparseTensorStorage(coosz, perm, sparsity, coo); + } else { + std::vector permsz(rank); + for (uint64_t r = 0; r < rank; r++) { + assert(shape[r] > 0 && "Dimension size zero has trivial storage"); + permsz[perm[r]] = shape[r]; + } + // We pass the null `coo` to ensure we select the intended constructor. + n = new SparseTensorStorage(permsz, perm, sparsity, coo); + } + return n; + } + + /// Factory method. Constructs a sparse tensor storage scheme with + /// the given dimensions, permutation, and per-dimension dense/sparse + /// annotations, using the sparse tensor for the initial contents. + /// + /// Preconditions: + /// * `shape`, `perm`, and `sparsity` must be valid for `rank`. + /// * The `tensor` must have the same value type `V`. + static SparseTensorStorage * + newSparseTensor(uint64_t rank, const uint64_t *shape, const uint64_t *perm, + const DimLevelType *sparsity, + const SparseTensorStorageBase *source) { + assert(source && "Got nullptr for source"); + SparseTensorEnumeratorBase *enumerator; + source->newEnumerator(&enumerator, rank, perm); + const auto &permsz = enumerator->permutedSizes(); + detail::assertPermutedSizesMatchShape(permsz, rank, perm, shape); + auto *tensor = + new SparseTensorStorage(permsz, perm, sparsity, *source); + delete enumerator; + return tensor; + } + +private: + /// Appends an arbitrary new position to `pointers[d]`. This method + /// checks that `pos` is representable in the `P` type; however, it + /// does not check that `pos` is semantically valid (i.e., larger than + /// the previous position and smaller than `indices[d].capacity()`). + void appendPointer(uint64_t d, uint64_t pos, uint64_t count = 1) { + assert(isCompressedDim(d)); + assert(pos <= std::numeric_limits

::max() && + "Pointer value is too large for the P-type"); + pointers[d].insert(pointers[d].end(), count, static_cast

(pos)); + } + + /// Appends index `i` to dimension `d`, in the semantically general + /// sense. For non-dense dimensions, that means appending to the + /// `indices[d]` array, checking that `i` is representable in the `I` + /// type; however, we do not verify other semantic requirements (e.g., + /// that `i` is in bounds for `dimSizes[d]`, and not previously occurring + /// in the same segment). For dense dimensions, this method instead + /// appends the appropriate number of zeros to the `values` array, + /// where `full` is the number of "entries" already written to `values` + /// for this segment (aka one after the highest index previously appended). + void appendIndex(uint64_t d, uint64_t full, uint64_t i) { + if (isCompressedDim(d) || isSingletonDim(d)) { + assert(i <= std::numeric_limits::max() && + "Index value is too large for the I-type"); + indices[d].push_back(static_cast(i)); + } else { // Dense dimension. + assert(isDenseDim(d)); + assert(i >= full && "Index was already filled"); + if (i == full) + return; // Short-circuit, since it'll be a nop. + if (d + 1 == getRank()) + values.insert(values.end(), i - full, 0); + else + finalizeSegment(d + 1, 0, i - full); + } + } + + /// Writes the given coordinate to `indices[d][pos]`. This method + /// checks that `i` is representable in the `I` type; however, it + /// does not check that `i` is semantically valid (i.e., in bounds + /// for `dimSizes[d]` and not elsewhere occurring in the same segment). + void writeIndex(uint64_t d, uint64_t pos, uint64_t i) { + assert(isCompressedDim(d)); + // Subscript assignment to `std::vector` requires that the `pos`-th + // entry has been initialized; thus we must be sure to check `size()` + // here, instead of `capacity()` as would be ideal. + assert(pos < indices[d].size() && "Index position is out of bounds"); + assert(i <= std::numeric_limits::max() && + "Index value is too large for the I-type"); + indices[d][pos] = static_cast(i); + } + + /// Computes the assembled-size associated with the `d`-th dimension, + /// given the assembled-size associated with the `(d-1)`-th dimension. + /// "Assembled-sizes" correspond to the (nominal) sizes of overhead + /// storage, as opposed to "dimension-sizes" which are the cardinality + /// of coordinates for that dimension. + /// + /// Precondition: the `pointers[d]` array must be fully initialized + /// before calling this method. + uint64_t assembledSize(uint64_t parentSz, uint64_t d) const { + if (isCompressedDim(d)) + return pointers[d][parentSz]; + // else if dense: + return parentSz * getDimSizes()[d]; + } + + /// Initializes sparse tensor storage scheme from a memory-resident sparse + /// tensor in coordinate scheme. This method prepares the pointers and + /// indices arrays under the given per-dimension dense/sparse annotations. + /// + /// Preconditions: + /// (1) the `elements` must be lexicographically sorted. + /// (2) the indices of every element are valid for `dimSizes` (equal rank + /// and pointwise less-than). + void fromCOO(const std::vector> &elements, uint64_t lo, + uint64_t hi, uint64_t d) { + uint64_t rank = getRank(); + assert(d <= rank && hi <= elements.size()); + // Once dimensions are exhausted, insert the numerical values. + if (d == rank) { + assert(lo < hi); + values.push_back(elements[lo].value); + return; + } + // Visit all elements in this interval. + uint64_t full = 0; + while (lo < hi) { // If `hi` is unchanged, then `lo < elements.size()`. + // Find segment in interval with same index elements in this dimension. + uint64_t i = elements[lo].indices[d]; + uint64_t seg = lo + 1; + bool merge = isUniqueDim(d); + while (merge && seg < hi && elements[seg].indices[d] == i) + seg++; + // Handle segment in interval for sparse or dense dimension. + appendIndex(d, full, i); + full = i + 1; + fromCOO(elements, lo, seg, d + 1); + // And move on to next segment in interval. + lo = seg; + } + // Finalize the sparse pointer structure at this dimension. + finalizeSegment(d, full); + } + + /// Finalize the sparse pointer structure at this dimension. + void finalizeSegment(uint64_t d, uint64_t full = 0, uint64_t count = 1) { + if (count == 0) + return; // Short-circuit, since it'll be a nop. + if (isCompressedDim(d)) { + appendPointer(d, indices[d].size(), count); + } else if (isSingletonDim(d)) { + return; + } else { // Dense dimension. + assert(isDenseDim(d)); + const uint64_t sz = getDimSizes()[d]; + assert(sz >= full && "Segment is overfull"); + count = detail::checkedMul(count, sz - full); + // For dense storage we must enumerate all the remaining coordinates + // in this dimension (i.e., coordinates after the last non-zero + // element), and either fill in their zero values or else recurse + // to finalize some deeper dimension. + if (d + 1 == getRank()) + values.insert(values.end(), count, 0); + else + finalizeSegment(d + 1, 0, count); + } + } + + /// Wraps up a single insertion path, inner to outer. + void endPath(uint64_t diff) { + uint64_t rank = getRank(); + assert(diff <= rank); + for (uint64_t i = 0; i < rank - diff; i++) { + const uint64_t d = rank - i - 1; + finalizeSegment(d, idx[d] + 1); + } + } + + /// Continues a single insertion path, outer to inner. + void insPath(const uint64_t *cursor, uint64_t diff, uint64_t top, V val) { + uint64_t rank = getRank(); + assert(diff < rank); + for (uint64_t d = diff; d < rank; d++) { + uint64_t i = cursor[d]; + appendIndex(d, top, i); + top = 0; + idx[d] = i; + } + values.push_back(val); + } + + /// Finds the lexicographic differing dimension. + uint64_t lexDiff(const uint64_t *cursor) const { + for (uint64_t r = 0, rank = getRank(); r < rank; r++) + if (cursor[r] > idx[r]) + return r; + else + assert(cursor[r] == idx[r] && "non-lexicographic insertion"); + assert(0 && "duplication insertion"); + return -1u; + } + + // Allow `SparseTensorEnumerator` to access the data-members (to avoid + // the cost of virtual-function dispatch in inner loops), without + // making them public to other client code. + friend class SparseTensorEnumerator; + + std::vector> pointers; + std::vector> indices; + std::vector values; + std::vector idx; // index cursor for lexicographic insertion. +}; + +/// A (higher-order) function object for enumerating the elements of some +/// `SparseTensorStorage` under a permutation. That is, the `forallElements` +/// method encapsulates the loop-nest for enumerating the elements of +/// the source tensor (in whatever order is best for the source tensor), +/// and applies a permutation to the coordinates/indices before handing +/// each element to the callback. A single enumerator object can be +/// freely reused for several calls to `forallElements`, just so long +/// as each call is sequential with respect to one another. +/// +/// N.B., this class stores a reference to the `SparseTensorStorageBase` +/// passed to the constructor; thus, objects of this class must not +/// outlive the sparse tensor they depend on. +/// +/// Design Note: The reason we define this class instead of simply using +/// `SparseTensorEnumerator` is because we need to hide/generalize +/// the `` template parameters from MLIR client code (to simplify the +/// type parameters used for direct sparse-to-sparse conversion). And the +/// reason we define the `SparseTensorEnumerator` subclasses rather +/// than simply using this class, is to avoid the cost of virtual-method +/// dispatch within the loop-nest. +template +class SparseTensorEnumeratorBase { +public: + /// Constructs an enumerator with the given permutation for mapping + /// the semantic-ordering of dimensions to the desired target-ordering. + /// + /// Preconditions: + /// * the `tensor` must have the same `V` value type. + /// * `perm` must be valid for `rank`. + SparseTensorEnumeratorBase(const SparseTensorStorageBase &tensor, + uint64_t rank, const uint64_t *perm) + : src(tensor), permsz(src.getRev().size()), reord(getRank()), + cursor(getRank()) { + assert(perm && "Received nullptr for permutation"); + assert(rank == getRank() && "Permutation rank mismatch"); + const auto &rev = src.getRev(); // source-order -> semantic-order + const auto &dimSizes = src.getDimSizes(); // in source storage-order + for (uint64_t s = 0; s < rank; s++) { // `s` source storage-order + uint64_t t = perm[rev[s]]; // `t` target-order + reord[s] = t; + permsz[t] = dimSizes[s]; + } + } + + virtual ~SparseTensorEnumeratorBase() = default; + + // We disallow copying to help avoid leaking the `src` reference. + // (In addition to avoiding the problem of slicing.) + SparseTensorEnumeratorBase(const SparseTensorEnumeratorBase &) = delete; + SparseTensorEnumeratorBase & + operator=(const SparseTensorEnumeratorBase &) = delete; + + /// Returns the source/target tensor's rank. (The source-rank and + /// target-rank are always equal since we only support permutations. + /// Though once we add support for other dimension mappings, this + /// method will have to be split in two.) + uint64_t getRank() const { return permsz.size(); } + + /// Returns the target tensor's dimension sizes. + const std::vector &permutedSizes() const { return permsz; } + + /// Enumerates all elements of the source tensor, permutes their + /// indices, and passes the permuted element to the callback. + /// The callback must not store the cursor reference directly, + /// since this function reuses the storage. Instead, the callback + /// must copy it if they want to keep it. + virtual void forallElements(ElementConsumer yield) = 0; + +protected: + const SparseTensorStorageBase &src; + std::vector permsz; // in target order. + std::vector reord; // source storage-order -> target order. + std::vector cursor; // in target order. +}; + +template +class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { + using Base = SparseTensorEnumeratorBase; + +public: + /// Constructs an enumerator with the given permutation for mapping + /// the semantic-ordering of dimensions to the desired target-ordering. + /// + /// Precondition: `perm` must be valid for `rank`. + SparseTensorEnumerator(const SparseTensorStorage &tensor, + uint64_t rank, const uint64_t *perm) + : Base(tensor, rank, perm) {} + + ~SparseTensorEnumerator() final = default; + + void forallElements(ElementConsumer yield) final { + forallElements(yield, 0, 0); + } + +private: + /// The recursive component of the public `forallElements`. + void forallElements(ElementConsumer yield, uint64_t parentPos, + uint64_t d) { + // Recover the `` type parameters of `src`. + const auto &src = + static_cast &>(this->src); + if (d == Base::getRank()) { + assert(parentPos < src.values.size() && + "Value position is out of bounds"); + // TODO: + yield(this->cursor, src.values[parentPos]); + } else if (src.isCompressedDim(d)) { + // Look up the bounds of the `d`-level segment determined by the + // `d-1`-level position `parentPos`. + const std::vector

&pointersD = src.pointers[d]; + assert(parentPos + 1 < pointersD.size() && + "Parent pointer position is out of bounds"); + const uint64_t pstart = static_cast(pointersD[parentPos]); + const uint64_t pstop = static_cast(pointersD[parentPos + 1]); + // Loop-invariant code for looking up the `d`-level coordinates/indices. + const std::vector &indicesD = src.indices[d]; + assert(pstop <= indicesD.size() && "Index position is out of bounds"); + uint64_t &cursorReordD = this->cursor[this->reord[d]]; + for (uint64_t pos = pstart; pos < pstop; pos++) { + cursorReordD = static_cast(indicesD[pos]); + forallElements(yield, pos, d + 1); + } + } else if (src.isSingletonDim(d)) { + MLIR_SPARSETENSOR_FATAL("unsupported dimension level type"); + } else { // Dense dimension. + assert(src.isDenseDim(d)); + const uint64_t sz = src.getDimSizes()[d]; + const uint64_t pstart = parentPos * sz; + uint64_t &cursorReordD = this->cursor[this->reord[d]]; + for (uint64_t i = 0; i < sz; i++) { + cursorReordD = i; + forallElements(yield, pstart + i, d + 1); + } + } + } +}; + +/// Statistics regarding the number of nonzero subtensors in +/// a source tensor, for direct sparse=>sparse conversion a la +/// . +/// +/// N.B., this class stores references to the parameters passed to +/// the constructor; thus, objects of this class must not outlive +/// those parameters. +class SparseTensorNNZ final { +public: + /// Allocate the statistics structure for the desired sizes and + /// sparsity (in the target tensor's storage-order). This constructor + /// does not actually populate the statistics, however; for that see + /// `initialize`. + /// + /// Precondition: `dimSizes` must not contain zeros. + SparseTensorNNZ(const std::vector &dimSizes, + const std::vector &sparsity); + + // We disallow copying to help avoid leaking the stored references. + SparseTensorNNZ(const SparseTensorNNZ &) = delete; + SparseTensorNNZ &operator=(const SparseTensorNNZ &) = delete; + + /// Returns the rank of the target tensor. + uint64_t getRank() const { return dimSizes.size(); } + + /// Enumerate the source tensor to fill in the statistics. The + /// enumerator should already incorporate the permutation (from + /// semantic-order to the target storage-order). + template + void initialize(SparseTensorEnumeratorBase &enumerator) { + assert(enumerator.getRank() == getRank() && "Tensor rank mismatch"); + assert(enumerator.permutedSizes() == dimSizes && "Tensor size mismatch"); + enumerator.forallElements( + [this](const std::vector &ind, V) { add(ind); }); + } + + /// The type of callback functions which receive an nnz-statistic. + using NNZConsumer = const std::function &; + + /// Lexicographically enumerates all indicies for dimensions strictly + /// less than `stopDim`, and passes their nnz statistic to the callback. + /// Since our use-case only requires the statistic not the coordinates + /// themselves, we do not bother to construct those coordinates. + void forallIndices(uint64_t stopDim, NNZConsumer yield) const; + +private: + /// Adds a new element (i.e., increment its statistics). We use + /// a method rather than inlining into the lambda in `initialize`, + /// to avoid spurious templating over `V`. And this method is private + /// to avoid needing to re-assert validity of `ind` (which is guaranteed + /// by `forallElements`). + void add(const std::vector &ind); + + /// Recursive component of the public `forallIndices`. + void forallIndices(NNZConsumer yield, uint64_t stopDim, uint64_t parentPos, + uint64_t d) const; + + // All of these are in the target storage-order. + const std::vector &dimSizes; + const std::vector &dimTypes; + std::vector> nnz; +}; + +template +SparseTensorStorage::SparseTensorStorage( + const std::vector &dimSizes, const uint64_t *perm, + const DimLevelType *sparsity, const SparseTensorStorageBase &tensor) + : SparseTensorStorage(dimSizes, perm, sparsity) { + SparseTensorEnumeratorBase *enumerator; + tensor.newEnumerator(&enumerator, getRank(), perm); + { + // Initialize the statistics structure. + SparseTensorNNZ nnz(getDimSizes(), getDimTypes()); + nnz.initialize(*enumerator); + // Initialize "pointers" overhead (and allocate "indices", "values"). + uint64_t parentSz = 1; // assembled-size (not dimension-size) of `r-1`. + for (uint64_t rank = getRank(), r = 0; r < rank; r++) { + if (isCompressedDim(r)) { + pointers[r].reserve(parentSz + 1); + pointers[r].push_back(0); + uint64_t currentPos = 0; + nnz.forallIndices(r, [this, ¤tPos, r](uint64_t n) { + currentPos += n; + appendPointer(r, currentPos); + }); + assert(pointers[r].size() == parentSz + 1 && + "Final pointers size doesn't match allocated size"); + // That assertion entails `assembledSize(parentSz, r)` + // is now in a valid state. That is, `pointers[r][parentSz]` + // equals the present value of `currentPos`, which is the + // correct assembled-size for `indices[r]`. + } + // Update assembled-size for the next iteration. + parentSz = assembledSize(parentSz, r); + // Ideally we need only `indices[r].reserve(parentSz)`, however + // the `std::vector` implementation forces us to initialize it too. + // That is, in the yieldPos loop we need random-access assignment + // to `indices[r]`; however, `std::vector`'s subscript-assignment + // only allows assigning to already-initialized positions. + if (isCompressedDim(r)) + indices[r].resize(parentSz, 0); + } + values.resize(parentSz, 0); // Both allocate and zero-initialize. + } + // The yieldPos loop + enumerator->forallElements([this](const std::vector &ind, V val) { + uint64_t parentSz = 1, parentPos = 0; + for (uint64_t rank = getRank(), r = 0; r < rank; r++) { + if (isCompressedDim(r)) { + // If `parentPos == parentSz` then it's valid as an array-lookup; + // however, it's semantically invalid here since that entry + // does not represent a segment of `indices[r]`. Moreover, that + // entry must be immutable for `assembledSize` to remain valid. + assert(parentPos < parentSz && "Pointers position is out of bounds"); + const uint64_t currentPos = pointers[r][parentPos]; + // This increment won't overflow the `P` type, since it can't + // exceed the original value of `pointers[r][parentPos+1]` + // which was already verified to be within bounds for `P` + // when it was written to the array. + pointers[r][parentPos]++; + writeIndex(r, currentPos, ind[r]); + parentPos = currentPos; + } else if (isSingletonDim(r)) { + // the new parentPos equals the old parentPos. + } else { // Dense dimension. + assert(isDenseDim(r)); + parentPos = parentPos * getDimSizes()[r] + ind[r]; + } + parentSz = assembledSize(parentSz, r); + } + assert(parentPos < values.size() && "Value position is out of bounds"); + values[parentPos] = val; + }); + // No longer need the enumerator, so we'll delete it ASAP. + delete enumerator; + // The finalizeYieldPos loop + for (uint64_t parentSz = 1, rank = getRank(), r = 0; r < rank; r++) { + if (isCompressedDim(r)) { + assert(parentSz == pointers[r].size() - 1 && + "Actual pointers size doesn't match the expected size"); + // Can't check all of them, but at least we can check the last one. + assert(pointers[r][parentSz - 1] == pointers[r][parentSz] && + "Pointers got corrupted"); + // TODO: optimize this by using `memmove` or similar. + for (uint64_t n = 0; n < parentSz; n++) { + const uint64_t parentPos = parentSz - n; + pointers[r][parentPos] = pointers[r][parentPos - 1]; + } + pointers[r][0] = 0; + } + parentSz = assembledSize(parentSz, r); + } +} + +} // namespace sparse_tensor +} // namespace mlir + +#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_STORAGE_H diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h b/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h @@ -1,4 +1,4 @@ -//===- SparseTensorUtils.h - Enums shared with the runtime ------*- C++ -*-===// +//===- SparseTensorUtils.h - SparseTensor runtime support lib ---*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,126 +7,25 @@ //===----------------------------------------------------------------------===// // // This header file provides the enums and functions which comprise the -// public API of `ExecutionEngine/SparseUtils.cpp`. +// public API of the `ExecutionEngine/SparseTensorUtils.cpp` runtime +// support library for the SparseTensor dialect. // //===----------------------------------------------------------------------===// -#ifndef MLIR_EXECUTIONENGINE_SPARSETENSORUTILS_H_ -#define MLIR_EXECUTIONENGINE_SPARSETENSORUTILS_H_ +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSORUTILS_H +#define MLIR_EXECUTIONENGINE_SPARSETENSORUTILS_H #include "mlir/ExecutionEngine/CRunnerUtils.h" -#include "mlir/ExecutionEngine/Float16bits.h" +#include "mlir/ExecutionEngine/SparseTensor/Enums.h" #include #include #include +using namespace mlir::sparse_tensor; + extern "C" { -//===----------------------------------------------------------------------===// -// -// Typedefs and enums. These are required to be public so that they -// can be shared with `Transforms/SparseTensorConversion.cpp`, since -// they define the arguments to the public functions declared later on. -// -// This section also defines x-macros -// so that we can generate variations of the public functions for each -// supported primary- and/or overhead-type. -// -//===----------------------------------------------------------------------===// - -/// This type is used in the public API at all places where MLIR expects -/// values with the built-in type "index". For now, we simply assume that -/// type is 64-bit, but targets with different "index" bit widths should -/// link with an alternatively built runtime support library. -// TODO: support such targets? -using index_type = uint64_t; - -/// Encoding of overhead types (both pointer overhead and indices -/// overhead), for "overloading" @newSparseTensor. -enum class OverheadType : uint32_t { - kIndex = 0, - kU64 = 1, - kU32 = 2, - kU16 = 3, - kU8 = 4 -}; - -// This x-macro calls its argument on every overhead type which has -// fixed-width. It excludes `index_type` because that type is often -// handled specially (e.g., by translating it into the architecture-dependent -// equivalent fixed-width overhead type). -#define FOREVERY_FIXED_O(DO) \ - DO(64, uint64_t) \ - DO(32, uint32_t) \ - DO(16, uint16_t) \ - DO(8, uint8_t) - -// This x-macro calls its argument on every overhead type, including -// `index_type`. -#define FOREVERY_O(DO) \ - FOREVERY_FIXED_O(DO) \ - DO(0, index_type) - -// These are not just shorthands but indicate the particular -// implementation used (e.g., as opposed to C99's `complex double`, -// or MLIR's `ComplexType`). -using complex64 = std::complex; -using complex32 = std::complex; - -/// Encoding of the elemental type, for "overloading" @newSparseTensor. -enum class PrimaryType : uint32_t { - kF64 = 1, - kF32 = 2, - kF16 = 3, - kBF16 = 4, - kI64 = 5, - kI32 = 6, - kI16 = 7, - kI8 = 8, - kC64 = 9, - kC32 = 10 -}; - -// This x-macro includes all `V` types. -#define FOREVERY_V(DO) \ - DO(F64, double) \ - DO(F32, float) \ - DO(F16, f16) \ - DO(BF16, bf16) \ - DO(I64, int64_t) \ - DO(I32, int32_t) \ - DO(I16, int16_t) \ - DO(I8, int8_t) \ - DO(C64, complex64) \ - DO(C32, complex32) - -/// The actions performed by @newSparseTensor. -enum class Action : uint32_t { - kEmpty = 0, - kFromFile = 1, - kFromCOO = 2, - kSparseToSparse = 3, - kEmptyCOO = 4, - kToCOO = 5, - kToIterator = 6, -}; - -/// This enum mimics `SparseTensorEncodingAttr::DimLevelType` for -/// breaking dependency cycles. `SparseTensorEncodingAttr::DimLevelType` -/// is the source of truth and this enum should be kept consistent with it. -enum class DimLevelType : uint8_t { - kDense = 0, - kCompressed = 1, - kCompressedNu = 2, - kCompressedNo = 3, - kCompressedNuNo = 4, - kSingleton = 5, - kSingletonNu = 6, - kSingletonNo = 7, - kSingletonNuNo = 8, -}; - //===----------------------------------------------------------------------===// // // Public functions which operate on MLIR buffers (memrefs) to interact @@ -243,7 +142,7 @@ #undef DECL_OUTSPARSETENSOR /// Releases the memory for the tensor-storage object. -void delSparseTensor(void *tensor); +MLIR_CRUNNERUTILS_EXPORT void delSparseTensor(void *tensor); /// Releases the memory for the coordinate-scheme object. #define DECL_DELCOO(VNAME, V) \ @@ -308,4 +207,4 @@ } // extern "C" -#endif // MLIR_EXECUTIONENGINE_SPARSETENSORUTILS_H_ +#endif // MLIR_EXECUTIONENGINE_SPARSETENSORUTILS_H diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -16,7 +16,7 @@ #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Complex/IR/Complex.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" -#include "mlir/ExecutionEngine/SparseTensorUtils.h" +#include "mlir/ExecutionEngine/SparseTensor/Enums.h" #include "mlir/IR/Builders.h" namespace mlir { diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -28,7 +28,7 @@ #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" #include "mlir/Dialect/SparseTensor/Transforms/Passes.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" -#include "mlir/ExecutionEngine/SparseTensorUtils.h" +#include "mlir/ExecutionEngine/SparseTensor/Enums.h" #include "mlir/Transforms/DialectConversion.h" using namespace mlir; diff --git a/mlir/lib/ExecutionEngine/CMakeLists.txt b/mlir/lib/ExecutionEngine/CMakeLists.txt --- a/mlir/lib/ExecutionEngine/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/CMakeLists.txt @@ -118,13 +118,26 @@ MLIRSupport ) +add_mlir_library(mlir_float16_utils + SHARED + Float16bits.cpp + + EXCLUDE_FROM_LIBMLIR + ) +set_property(TARGET mlir_float16_utils PROPERTY CXX_STANDARD 17) +target_compile_definitions(mlir_float16_utils PRIVATE mlir_float16_utils_EXPORTS) + +add_subdirectory(SparseTensor) + add_mlir_library(mlir_c_runner_utils SHARED CRunnerUtils.cpp - Float16bits.cpp SparseTensorUtils.cpp EXCLUDE_FROM_LIBMLIR + + LINK_LIBS PUBLIC + mlir_sparse_tensor_utils ) set_property(TARGET mlir_c_runner_utils PROPERTY CXX_STANDARD 17) target_compile_definitions(mlir_c_runner_utils PRIVATE mlir_c_runner_utils_EXPORTS) diff --git a/mlir/lib/ExecutionEngine/Float16bits.cpp b/mlir/lib/ExecutionEngine/Float16bits.cpp --- a/mlir/lib/ExecutionEngine/Float16bits.cpp +++ b/mlir/lib/ExecutionEngine/Float16bits.cpp @@ -12,6 +12,9 @@ //===----------------------------------------------------------------------===// #include "mlir/ExecutionEngine/Float16bits.h" + +#ifdef MLIR_FLOAT16_DEFINE_FUNCTIONS // We are building this library + #include #include @@ -188,3 +191,5 @@ // cases. return __truncsfbf2(static_cast(d)); } + +#endif // MLIR_FLOAT16_DEFINE_FUNCTIONS diff --git a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt @@ -0,0 +1,23 @@ +# Unlike mlir_float16_utils, mlir_c_runner_utils, etc, we do *not* make +# this a shared library: because doing so causes issues building on Windows. +add_mlir_library(mlir_sparse_tensor_utils + File.cpp + NNZ.cpp + Storage.cpp + + EXCLUDE_FROM_LIBMLIR + + LINK_LIBS PUBLIC + mlir_float16_utils + ) +set_property(TARGET mlir_sparse_tensor_utils PROPERTY CXX_STANDARD 17) +target_compile_definitions(mlir_sparse_tensor_utils PRIVATE mlir_sparse_tensor_utils_EXPORTS) + +# To make sure we adhere to the style guide: +# +check_cxx_compiler_flag(-Wweak-vtables + COMPILER_SUPPORTS_WARNING_WEAK_VTABLES) +if(COMPILER_SUPPORTS_WARNING_WEAK_VTABLES) + target_compile_options(mlir_sparse_tensor_utils PUBLIC + "-Wweak-vtables") +endif() diff --git a/mlir/lib/ExecutionEngine/SparseTensor/File.cpp b/mlir/lib/ExecutionEngine/SparseTensor/File.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/ExecutionEngine/SparseTensor/File.cpp @@ -0,0 +1,163 @@ +//===- File.cpp - Parsing sparse tensors from files -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements parsing and printing of files in one of the +// following external formats: +// +// (1) Matrix Market Exchange (MME): *.mtx +// https://math.nist.gov/MatrixMarket/formats.html +// +// (2) Formidable Repository of Open Sparse Tensors and Tools (FROSTT): *.tns +// http://frostt.io/tensors/file-formats.html +// +// This file is part of the lightweight runtime support library for sparse +// tensor manipulations. The functionality of the support library is meant +// to simplify benchmarking, testing, and debugging MLIR code operating on +// sparse tensors. However, the provided functionality is **not** part of +// core MLIR itself. +// +//===----------------------------------------------------------------------===// + +#include "mlir/ExecutionEngine/SparseTensor/File.h" + +#ifdef MLIR_SPARSETENSOR_DEFINE_FUNCTIONS // We are building this library + +#include +#include + +using namespace mlir::sparse_tensor; + +/// Opens the file for reading. +void SparseTensorFile::openFile() { + if (file) + MLIR_SPARSETENSOR_FATAL("Already opened file %s\n", filename); + file = fopen(filename, "r"); + if (!file) + MLIR_SPARSETENSOR_FATAL("Cannot find file %s\n", filename); +} + +/// Closes the file. +void SparseTensorFile::closeFile() { + if (file) { + fclose(file); + file = nullptr; + } +} + +// TODO(wrengr/bixia): figure out how to reorganize the element-parsing +// loop of `openSparseTensorCOO` into methods of this class, so we can +// avoid leaking access to the `line` pointer (both for general hygiene +// and because we can't mark it const due to the second argument of +// `strtoul`/`strtoud` being `char * *restrict` rather than +// `char const* *restrict`). +// +/// Attempts to read a line from the file. +char *SparseTensorFile::readLine() { + if (fgets(line, kColWidth, file)) + return line; + MLIR_SPARSETENSOR_FATAL("Cannot read next line of %s\n", filename); +} + +/// Reads and parses the file's header. +void SparseTensorFile::readHeader() { + assert(file && "Attempt to readHeader() before openFile()"); + if (strstr(filename, ".mtx")) + readMMEHeader(); + else if (strstr(filename, ".tns")) + readExtFROSTTHeader(); + else + MLIR_SPARSETENSOR_FATAL("Unknown format %s\n", filename); + assert(isValid() && "Failed to read the header"); +} + +/// Asserts the shape subsumes the actual dimension sizes. Is only +/// valid after parsing the header. +void SparseTensorFile::assertMatchesShape(uint64_t rank, + const uint64_t *shape) const { + assert(rank == getRank() && "Rank mismatch"); + for (uint64_t r = 0; r < rank; r++) + assert((shape[r] == 0 || shape[r] == idata[2 + r]) && + "Dimension size mismatch"); +} + +/// Helper to convert string to lower case. +static inline char *toLower(char *token) { + for (char *c = token; *c; ++c) + *c = tolower(*c); + return token; +} + +/// Read the MME header of a general sparse matrix of type real. +void SparseTensorFile::readMMEHeader() { + char header[64]; + char object[64]; + char format[64]; + char field[64]; + char symmetry[64]; + // Read header line. + if (fscanf(file, "%63s %63s %63s %63s %63s\n", header, object, format, field, + symmetry) != 5) + MLIR_SPARSETENSOR_FATAL("Corrupt header in %s\n", filename); + // Process `field`, which specify pattern or the data type of the values. + if (strcmp(toLower(field), "pattern") == 0) + valueKind_ = ValueKind::kPattern; + else if (strcmp(toLower(field), "real") == 0) + valueKind_ = ValueKind::kReal; + else if (strcmp(toLower(field), "integer") == 0) + valueKind_ = ValueKind::kInteger; + else if (strcmp(toLower(field), "complex") == 0) + valueKind_ = ValueKind::kComplex; + else + MLIR_SPARSETENSOR_FATAL("Unexpected header field value in %s\n", filename); + + // Set properties. + isSymmetric_ = (strcmp(toLower(symmetry), "symmetric") == 0); + // Make sure this is a general sparse matrix. + if (strcmp(toLower(header), "%%matrixmarket") || + strcmp(toLower(object), "matrix") || + strcmp(toLower(format), "coordinate") || + (strcmp(toLower(symmetry), "general") && !isSymmetric_)) + MLIR_SPARSETENSOR_FATAL("Cannot find a general sparse matrix in %s\n", + filename); + // Skip comments. + while (true) { + readLine(); + if (line[0] != '%') + break; + } + // Next line contains M N NNZ. + idata[0] = 2; // rank + if (sscanf(line, "%" PRIu64 "%" PRIu64 "%" PRIu64 "\n", idata + 2, idata + 3, + idata + 1) != 3) + MLIR_SPARSETENSOR_FATAL("Cannot find size in %s\n", filename); +} + +/// Read the "extended" FROSTT header. Although not part of the documented +/// format, we assume that the file starts with optional comments followed +/// by two lines that define the rank, the number of nonzeros, and the +/// dimensions sizes (one per rank) of the sparse tensor. +void SparseTensorFile::readExtFROSTTHeader() { + // Skip comments. + while (true) { + readLine(); + if (line[0] != '#') + break; + } + // Next line contains RANK and NNZ. + if (sscanf(line, "%" PRIu64 "%" PRIu64 "\n", idata, idata + 1) != 2) + MLIR_SPARSETENSOR_FATAL("Cannot find metadata in %s\n", filename); + // Followed by a line with the dimension sizes (one per rank). + for (uint64_t r = 0; r < idata[0]; r++) + if (fscanf(file, "%" PRIu64, idata + 2 + r) != 1) + MLIR_SPARSETENSOR_FATAL("Cannot find dimension size %s\n", filename); + readLine(); // end of line + // The FROSTT format does not define the data type of the nonzero elements. + valueKind_ = ValueKind::kUndefined; +} + +#endif // MLIR_SPARSETENSOR_DEFINE_FUNCTIONS diff --git a/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp b/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp @@ -0,0 +1,88 @@ +//===- NNZ.cpp - NNZ-statistics for direct sparse2sparse conversion -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains method definitions for `SparseTensorNNZ`. +// +// This file is part of the lightweight runtime support library for sparse +// tensor manipulations. The functionality of the support library is meant +// to simplify benchmarking, testing, and debugging MLIR code operating on +// sparse tensors. However, the provided functionality is **not** part of +// core MLIR itself. +// +//===----------------------------------------------------------------------===// + +#include "mlir/ExecutionEngine/SparseTensor/Storage.h" + +#ifdef MLIR_SPARSETENSOR_DEFINE_FUNCTIONS // We are building this library + +using namespace mlir::sparse_tensor; + +SparseTensorNNZ::SparseTensorNNZ(const std::vector &dimSizes, + const std::vector &sparsity) + : dimSizes(dimSizes), dimTypes(sparsity), nnz(getRank()) { + assert(dimSizes.size() == dimTypes.size() && "Rank mismatch"); + bool uncompressed = true; + (void)uncompressed; + uint64_t sz = 1; // the product of all `dimSizes` strictly less than `r`. + for (uint64_t rank = getRank(), r = 0; r < rank; r++) { + switch (dimTypes[r]) { + case DimLevelType::kCompressed: + assert(uncompressed && + "Multiple compressed layers not currently supported"); + uncompressed = false; + nnz[r].resize(sz, 0); // Both allocate and zero-initialize. + break; + case DimLevelType::kDense: + assert(uncompressed && "Dense after compressed not currently supported"); + break; + case DimLevelType::kSingleton: + // Singleton after Compressed causes no problems for allocating + // `nnz` nor for the yieldPos loop. This remains true even + // when adding support for multiple compressed dimensions or + // for dense-after-compressed. + break; + default: + MLIR_SPARSETENSOR_FATAL("unsupported dimension level type"); + } + sz = detail::checkedMul(sz, dimSizes[r]); + } +} + +void SparseTensorNNZ::forallIndices(uint64_t stopDim, + SparseTensorNNZ::NNZConsumer yield) const { + assert(stopDim < getRank() && "Stopping-dimension is out of bounds"); + assert(dimTypes[stopDim] == DimLevelType::kCompressed && + "Cannot look up non-compressed dimensions"); + forallIndices(yield, stopDim, 0, 0); +} + +void SparseTensorNNZ::add(const std::vector &ind) { + uint64_t parentPos = 0; + for (uint64_t rank = getRank(), r = 0; r < rank; r++) { + if (dimTypes[r] == DimLevelType::kCompressed) + nnz[r][parentPos]++; + parentPos = parentPos * dimSizes[r] + ind[r]; + } +} + +void SparseTensorNNZ::forallIndices(SparseTensorNNZ::NNZConsumer yield, + uint64_t stopDim, uint64_t parentPos, + uint64_t d) const { + assert(d <= stopDim); + if (d == stopDim) { + assert(parentPos < nnz[d].size() && "Cursor is out of range"); + yield(nnz[d][parentPos]); + } else { + const uint64_t sz = dimSizes[d]; + const uint64_t pstart = parentPos * sz; + for (uint64_t i = 0; i < sz; i++) + forallIndices(yield, stopDim, pstart + i, d + 1); + } +} + +#endif // MLIR_SPARSETENSOR_DEFINE_FUNCTIONS diff --git a/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp b/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp @@ -0,0 +1,100 @@ +//===- StorageBase.cpp - TACO-flavored sparse tensor representation -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains method definitions for `SparseTensorStorageBase`. +// In particular we want to ensure that the default implementations of +// the "partial method specialization" trick aren't inline (since there's +// no benefit). Though this also helps ensure that we avoid weak-vtables: +// +// +// This file is part of the lightweight runtime support library for sparse +// tensor manipulations. The functionality of the support library is meant +// to simplify benchmarking, testing, and debugging MLIR code operating on +// sparse tensors. However, the provided functionality is **not** part of +// core MLIR itself. +// +//===----------------------------------------------------------------------===// + +#include "mlir/ExecutionEngine/SparseTensor/Storage.h" + +#ifdef MLIR_SPARSETENSOR_DEFINE_FUNCTIONS // We are building this library + +using namespace mlir::sparse_tensor; + +SparseTensorStorageBase::SparseTensorStorageBase( + const std::vector &dimSizes, const uint64_t *perm, + const DimLevelType *sparsity) + : dimSizes(dimSizes), rev(getRank()), + dimTypes(sparsity, sparsity + getRank()) { + assert(perm && sparsity); + const uint64_t rank = getRank(); + // Validate parameters. + assert(rank > 0 && "Trivial shape is unsupported"); + for (uint64_t r = 0; r < rank; r++) { + assert(dimSizes[r] > 0 && "Dimension size zero has trivial storage"); + assert((isDenseDim(r) || isCompressedDim(r) || isSingletonDim(r)) && + "Unsupported DimLevelType"); + } + // Construct the "reverse" (i.e., inverse) permutation. + for (uint64_t r = 0; r < rank; r++) + rev[perm[r]] = r; +} + +// Helper macro for generating error messages when some +// `SparseTensorStorage` is cast to `SparseTensorStorageBase` +// and then the wrong "partial method specialization" is called. +#define FATAL_PIV(NAME) \ + MLIR_SPARSETENSOR_FATAL(" type mismatch for: " #NAME); + +#define IMPL_NEWENUMERATOR(VNAME, V) \ + void SparseTensorStorageBase::newEnumerator( \ + SparseTensorEnumeratorBase **, uint64_t, const uint64_t *) const { \ + FATAL_PIV("newEnumerator" #VNAME); \ + } +FOREVERY_V(IMPL_NEWENUMERATOR) +#undef IMPL_NEWENUMERATOR + +#define IMPL_GETPOINTERS(PNAME, P) \ + void SparseTensorStorageBase::getPointers(std::vector

**, uint64_t) { \ + FATAL_PIV("getPointers" #PNAME); \ + } +FOREVERY_FIXED_O(IMPL_GETPOINTERS) +#undef IMPL_GETPOINTERS + +#define IMPL_GETINDICES(INAME, I) \ + void SparseTensorStorageBase::getIndices(std::vector **, uint64_t) { \ + FATAL_PIV("getIndices" #INAME); \ + } +FOREVERY_FIXED_O(IMPL_GETINDICES) +#undef IMPL_GETINDICES + +#define IMPL_GETVALUES(VNAME, V) \ + void SparseTensorStorageBase::getValues(std::vector **) { \ + FATAL_PIV("getValues" #VNAME); \ + } +FOREVERY_V(IMPL_GETVALUES) +#undef IMPL_GETVALUES + +#define IMPL_LEXINSERT(VNAME, V) \ + void SparseTensorStorageBase::lexInsert(const uint64_t *, V) { \ + FATAL_PIV("lexInsert" #VNAME); \ + } +FOREVERY_V(IMPL_LEXINSERT) +#undef IMPL_LEXINSERT + +#define IMPL_EXPINSERT(VNAME, V) \ + void SparseTensorStorageBase::expInsert(uint64_t *, V *, bool *, uint64_t *, \ + uint64_t) { \ + FATAL_PIV("expInsert" #VNAME); \ + } +FOREVERY_V(IMPL_EXPINSERT) +#undef IMPL_EXPINSERT + +#undef FATAL_PIV + +#endif // MLIR_SPARSETENSOR_DEFINE_FUNCTIONS diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -15,6 +15,10 @@ //===----------------------------------------------------------------------===// #include "mlir/ExecutionEngine/SparseTensorUtils.h" +#include "mlir/ExecutionEngine/SparseTensor/COO.h" +#include "mlir/ExecutionEngine/SparseTensor/ErrorHandling.h" +#include "mlir/ExecutionEngine/SparseTensor/File.h" +#include "mlir/ExecutionEngine/SparseTensor/Storage.h" #ifdef MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS @@ -30,6 +34,8 @@ #include #include +using namespace mlir::sparse_tensor; + //===----------------------------------------------------------------------===// // // Internal support for storing and reading sparse tensors. @@ -68,1363 +74,6 @@ namespace { -static constexpr int kColWidth = 1025; - -/// A version of `operator*` on `uint64_t` which checks for overflows. -static inline uint64_t checkedMul(uint64_t lhs, uint64_t rhs) { - assert((lhs == 0 || rhs <= std::numeric_limits::max() / lhs) && - "Integer overflow"); - return lhs * rhs; -} - -// This macro helps minimize repetition of this idiom, as well as ensuring -// we have some additional output indicating where the error is coming from. -// (Since `fprintf` doesn't provide a stacktrace, this helps make it easier -// to track down whether an error is coming from our code vs somewhere else -// in MLIR.) -#define FATAL(...) \ - do { \ - fprintf(stderr, "SparseTensorUtils: " __VA_ARGS__); \ - exit(1); \ - } while (0) - -// TODO: try to unify this with `SparseTensorFile::assertMatchesShape` -// which is used by `openSparseTensorCOO`. It's easy enough to resolve -// the `std::vector` vs pointer mismatch for `dimSizes`; but it's trickier -// to resolve the presence/absence of `perm` (without introducing extra -// overhead), so perhaps the code duplication is unavoidable. -// -/// Asserts that the `dimSizes` (in target-order) under the `perm` (mapping -/// semantic-order to target-order) are a refinement of the desired `shape` -/// (in semantic-order). -/// -/// Precondition: `perm` and `shape` must be valid for `rank`. -static inline void -assertPermutedSizesMatchShape(const std::vector &dimSizes, - uint64_t rank, const uint64_t *perm, - const uint64_t *shape) { - assert(perm && shape); - assert(rank == dimSizes.size() && "Rank mismatch"); - for (uint64_t r = 0; r < rank; r++) - assert((shape[r] == 0 || shape[r] == dimSizes[perm[r]]) && - "Dimension size mismatch"); -} - -/// A sparse tensor element in coordinate scheme (value and indices). -/// For example, a rank-1 vector element would look like -/// ({i}, a[i]) -/// and a rank-5 tensor element like -/// ({i,j,k,l,m}, a[i,j,k,l,m]) -/// We use pointer to a shared index pool rather than e.g. a direct -/// vector since that (1) reduces the per-element memory footprint, and -/// (2) centralizes the memory reservation and (re)allocation to one place. -template -struct Element final { - Element(uint64_t *ind, V val) : indices(ind), value(val){}; - uint64_t *indices; // pointer into shared index pool - V value; -}; - -/// The type of callback functions which receive an element. We avoid -/// packaging the coordinates and value together as an `Element` object -/// because this helps keep code somewhat cleaner. -template -using ElementConsumer = - const std::function &, V)> &; - -/// A memory-resident sparse tensor in coordinate scheme (collection of -/// elements). This data structure is used to read a sparse tensor from -/// any external format into memory and sort the elements lexicographically -/// by indices before passing it back to the client (most packed storage -/// formats require the elements to appear in lexicographic index order). -template -struct SparseTensorCOO final { -public: - SparseTensorCOO(const std::vector &dimSizes, uint64_t capacity) - : dimSizes(dimSizes) { - if (capacity) { - elements.reserve(capacity); - indices.reserve(capacity * getRank()); - } - } - - /// Adds element as indices and value. - void add(const std::vector &ind, V val) { - assert(!iteratorLocked && "Attempt to add() after startIterator()"); - uint64_t *base = indices.data(); - uint64_t size = indices.size(); - uint64_t rank = getRank(); - assert(ind.size() == rank && "Element rank mismatch"); - for (uint64_t r = 0; r < rank; r++) { - assert(ind[r] < dimSizes[r] && "Index is too large for the dimension"); - indices.push_back(ind[r]); - } - // This base only changes if indices were reallocated. In that case, we - // need to correct all previous pointers into the vector. Note that this - // only happens if we did not set the initial capacity right, and then only - // for every internal vector reallocation (which with the doubling rule - // should only incur an amortized linear overhead). - uint64_t *newBase = indices.data(); - if (newBase != base) { - for (uint64_t i = 0, n = elements.size(); i < n; i++) - elements[i].indices = newBase + (elements[i].indices - base); - base = newBase; - } - // Add element as (pointer into shared index pool, value) pair. - elements.emplace_back(base + size, val); - } - - /// Sorts elements lexicographically by index. - void sort() { - assert(!iteratorLocked && "Attempt to sort() after startIterator()"); - // TODO: we may want to cache an `isSorted` bit, to avoid - // unnecessary/redundant sorting. - uint64_t rank = getRank(); - std::sort(elements.begin(), elements.end(), - [rank](const Element &e1, const Element &e2) { - for (uint64_t r = 0; r < rank; r++) { - if (e1.indices[r] == e2.indices[r]) - continue; - return e1.indices[r] < e2.indices[r]; - } - return false; - }); - } - - /// Get the rank of the tensor. - uint64_t getRank() const { return dimSizes.size(); } - - /// Getter for the dimension-sizes array. - const std::vector &getDimSizes() const { return dimSizes; } - - /// Getter for the elements array. - const std::vector> &getElements() const { return elements; } - - /// Switch into iterator mode. - void startIterator() { - iteratorLocked = true; - iteratorPos = 0; - } - - /// Get the next element. - const Element *getNext() { - assert(iteratorLocked && "Attempt to getNext() before startIterator()"); - if (iteratorPos < elements.size()) - return &(elements[iteratorPos++]); - iteratorLocked = false; - return nullptr; - } - - /// Factory method. Permutes the original dimensions according to - /// the given ordering and expects subsequent add() calls to honor - /// that same ordering for the given indices. The result is a - /// fully permuted coordinate scheme. - /// - /// Precondition: `dimSizes` and `perm` must be valid for `rank`. - static SparseTensorCOO *newSparseTensorCOO(uint64_t rank, - const uint64_t *dimSizes, - const uint64_t *perm, - uint64_t capacity = 0) { - std::vector permsz(rank); - for (uint64_t r = 0; r < rank; r++) { - assert(dimSizes[r] > 0 && "Dimension size zero has trivial storage"); - permsz[perm[r]] = dimSizes[r]; - } - return new SparseTensorCOO(permsz, capacity); - } - -private: - const std::vector dimSizes; // per-dimension sizes - std::vector> elements; // all COO elements - std::vector indices; // shared index pool - bool iteratorLocked = false; - unsigned iteratorPos = 0; -}; - -// Forward. -template -class SparseTensorEnumeratorBase; - -// Helper macro for generating error messages when some -// `SparseTensorStorage` is cast to `SparseTensorStorageBase` -// and then the wrong "partial method specialization" is called. -#define FATAL_PIV(NAME) FATAL(" type mismatch for: " #NAME); - -/// Abstract base class for `SparseTensorStorage`. This class -/// takes responsibility for all the ``-independent aspects -/// of the tensor (e.g., shape, sparsity, permutation). In addition, -/// we use function overloading to implement "partial" method -/// specialization, which the C-API relies on to catch type errors -/// arising from our use of opaque pointers. -class SparseTensorStorageBase { -public: - /// Constructs a new storage object. The `perm` maps the tensor's - /// semantic-ordering of dimensions to this object's storage-order. - /// The `dimSizes` and `sparsity` arrays are already in storage-order. - /// - /// Precondition: `perm` and `sparsity` must be valid for `dimSizes.size()`. - SparseTensorStorageBase(const std::vector &dimSizes, - const uint64_t *perm, const DimLevelType *sparsity) - : dimSizes(dimSizes), rev(getRank()), - dimTypes(sparsity, sparsity + getRank()) { - assert(perm && sparsity); - const uint64_t rank = getRank(); - // Validate parameters. - assert(rank > 0 && "Trivial shape is unsupported"); - for (uint64_t r = 0; r < rank; r++) { - assert(dimSizes[r] > 0 && "Dimension size zero has trivial storage"); - assert((dimTypes[r] == DimLevelType::kDense || - dimTypes[r] == DimLevelType::kCompressed) && - "Unsupported DimLevelType"); - } - // Construct the "reverse" (i.e., inverse) permutation. - for (uint64_t r = 0; r < rank; r++) - rev[perm[r]] = r; - } - - virtual ~SparseTensorStorageBase() = default; - - /// Get the rank of the tensor. - uint64_t getRank() const { return dimSizes.size(); } - - /// Getter for the dimension-sizes array, in storage-order. - const std::vector &getDimSizes() const { return dimSizes; } - - /// Safely lookup the size of the given (storage-order) dimension. - uint64_t getDimSize(uint64_t d) const { - assert(d < getRank()); - return dimSizes[d]; - } - - /// Getter for the "reverse" permutation, which maps this object's - /// storage-order to the tensor's semantic-order. - const std::vector &getRev() const { return rev; } - - /// Getter for the dimension-types array, in storage-order. - const std::vector &getDimTypes() const { return dimTypes; } - - /// Safely check if the (storage-order) dimension uses compressed storage. - bool isCompressedDim(uint64_t d) const { - assert(d < getRank()); - return (dimTypes[d] == DimLevelType::kCompressed); - } - - /// Allocate a new enumerator. -#define DECL_NEWENUMERATOR(VNAME, V) \ - virtual void newEnumerator(SparseTensorEnumeratorBase **, uint64_t, \ - const uint64_t *) const { \ - FATAL_PIV("newEnumerator" #VNAME); \ - } - FOREVERY_V(DECL_NEWENUMERATOR) -#undef DECL_NEWENUMERATOR - - /// Overhead storage. -#define DECL_GETPOINTERS(PNAME, P) \ - virtual void getPointers(std::vector

**, uint64_t) { \ - FATAL_PIV("getPointers" #PNAME); \ - } - FOREVERY_FIXED_O(DECL_GETPOINTERS) -#undef DECL_GETPOINTERS -#define DECL_GETINDICES(INAME, I) \ - virtual void getIndices(std::vector **, uint64_t) { \ - FATAL_PIV("getIndices" #INAME); \ - } - FOREVERY_FIXED_O(DECL_GETINDICES) -#undef DECL_GETINDICES - - /// Primary storage. -#define DECL_GETVALUES(VNAME, V) \ - virtual void getValues(std::vector **) { FATAL_PIV("getValues" #VNAME); } - FOREVERY_V(DECL_GETVALUES) -#undef DECL_GETVALUES - - /// Element-wise insertion in lexicographic index order. -#define DECL_LEXINSERT(VNAME, V) \ - virtual void lexInsert(const uint64_t *, V) { FATAL_PIV("lexInsert" #VNAME); } - FOREVERY_V(DECL_LEXINSERT) -#undef DECL_LEXINSERT - - /// Expanded insertion. -#define DECL_EXPINSERT(VNAME, V) \ - virtual void expInsert(uint64_t *, V *, bool *, uint64_t *, uint64_t) { \ - FATAL_PIV("expInsert" #VNAME); \ - } - FOREVERY_V(DECL_EXPINSERT) -#undef DECL_EXPINSERT - - /// Finishes insertion. - virtual void endInsert() = 0; - -protected: - // Since this class is virtual, we must disallow public copying in - // order to avoid "slicing". Since this class has data members, - // that means making copying protected. - // - SparseTensorStorageBase(const SparseTensorStorageBase &) = default; - // Copy-assignment would be implicitly deleted (because `dimSizes` - // is const), so we explicitly delete it for clarity. - SparseTensorStorageBase &operator=(const SparseTensorStorageBase &) = delete; - -private: - const std::vector dimSizes; - std::vector rev; - const std::vector dimTypes; -}; - -#undef FATAL_PIV - -// Forward. -template -class SparseTensorEnumerator; - -/// A memory-resident sparse tensor using a storage scheme based on -/// per-dimension sparse/dense annotations. This data structure provides a -/// bufferized form of a sparse tensor type. In contrast to generating setup -/// methods for each differently annotated sparse tensor, this method provides -/// a convenient "one-size-fits-all" solution that simply takes an input tensor -/// and annotations to implement all required setup in a general manner. -template -class SparseTensorStorage final : public SparseTensorStorageBase { - /// Private constructor to share code between the other constructors. - /// Beware that the object is not necessarily guaranteed to be in a - /// valid state after this constructor alone; e.g., `isCompressedDim(d)` - /// doesn't entail `!(pointers[d].empty())`. - /// - /// Precondition: `perm` and `sparsity` must be valid for `dimSizes.size()`. - SparseTensorStorage(const std::vector &dimSizes, - const uint64_t *perm, const DimLevelType *sparsity) - : SparseTensorStorageBase(dimSizes, perm, sparsity), pointers(getRank()), - indices(getRank()), idx(getRank()) {} - -public: - /// Constructs a sparse tensor storage scheme with the given dimensions, - /// permutation, and per-dimension dense/sparse annotations, using - /// the coordinate scheme tensor for the initial contents if provided. - /// - /// Precondition: `perm` and `sparsity` must be valid for `dimSizes.size()`. - SparseTensorStorage(const std::vector &dimSizes, - const uint64_t *perm, const DimLevelType *sparsity, - SparseTensorCOO *coo) - : SparseTensorStorage(dimSizes, perm, sparsity) { - // Provide hints on capacity of pointers and indices. - // TODO: needs much fine-tuning based on actual sparsity; currently - // we reserve pointer/index space based on all previous dense - // dimensions, which works well up to first sparse dim; but - // we should really use nnz and dense/sparse distribution. - bool allDense = true; - uint64_t sz = 1; - for (uint64_t r = 0, rank = getRank(); r < rank; r++) { - if (isCompressedDim(r)) { - // TODO: Take a parameter between 1 and `dimSizes[r]`, and multiply - // `sz` by that before reserving. (For now we just use 1.) - pointers[r].reserve(sz + 1); - pointers[r].push_back(0); - indices[r].reserve(sz); - sz = 1; - allDense = false; - } else { // Dense dimension. - sz = checkedMul(sz, getDimSizes()[r]); - } - } - // Then assign contents from coordinate scheme tensor if provided. - if (coo) { - // Ensure both preconditions of `fromCOO`. - assert(coo->getDimSizes() == getDimSizes() && "Tensor size mismatch"); - coo->sort(); - // Now actually insert the `elements`. - const std::vector> &elements = coo->getElements(); - uint64_t nnz = elements.size(); - values.reserve(nnz); - fromCOO(elements, 0, nnz, 0); - } else if (allDense) { - values.resize(sz, 0); - } - } - - /// Constructs a sparse tensor storage scheme with the given dimensions, - /// permutation, and per-dimension dense/sparse annotations, using - /// the given sparse tensor for the initial contents. - /// - /// Preconditions: - /// * `perm` and `sparsity` must be valid for `dimSizes.size()`. - /// * The `tensor` must have the same value type `V`. - SparseTensorStorage(const std::vector &dimSizes, - const uint64_t *perm, const DimLevelType *sparsity, - const SparseTensorStorageBase &tensor); - - ~SparseTensorStorage() final = default; - - /// Partially specialize these getter methods based on template types. - void getPointers(std::vector

**out, uint64_t d) final { - assert(d < getRank()); - *out = &pointers[d]; - } - void getIndices(std::vector **out, uint64_t d) final { - assert(d < getRank()); - *out = &indices[d]; - } - void getValues(std::vector **out) final { *out = &values; } - - /// Partially specialize lexicographical insertions based on template types. - void lexInsert(const uint64_t *cursor, V val) final { - // First, wrap up pending insertion path. - uint64_t diff = 0; - uint64_t top = 0; - if (!values.empty()) { - diff = lexDiff(cursor); - endPath(diff + 1); - top = idx[diff] + 1; - } - // Then continue with insertion path. - insPath(cursor, diff, top, val); - } - - /// Partially specialize expanded insertions based on template types. - /// Note that this method resets the values/filled-switch array back - /// to all-zero/false while only iterating over the nonzero elements. - void expInsert(uint64_t *cursor, V *values, bool *filled, uint64_t *added, - uint64_t count) final { - if (count == 0) - return; - // Sort. - std::sort(added, added + count); - // Restore insertion path for first insert. - const uint64_t lastDim = getRank() - 1; - uint64_t index = added[0]; - cursor[lastDim] = index; - lexInsert(cursor, values[index]); - assert(filled[index]); - values[index] = 0; - filled[index] = false; - // Subsequent insertions are quick. - for (uint64_t i = 1; i < count; i++) { - assert(index < added[i] && "non-lexicographic insertion"); - index = added[i]; - cursor[lastDim] = index; - insPath(cursor, lastDim, added[i - 1] + 1, values[index]); - assert(filled[index]); - values[index] = 0; - filled[index] = false; - } - } - - /// Finalizes lexicographic insertions. - void endInsert() final { - if (values.empty()) - finalizeSegment(0); - else - endPath(0); - } - - void newEnumerator(SparseTensorEnumeratorBase **out, uint64_t rank, - const uint64_t *perm) const final { - *out = new SparseTensorEnumerator(*this, rank, perm); - } - - /// Returns this sparse tensor storage scheme as a new memory-resident - /// sparse tensor in coordinate scheme with the given dimension order. - /// - /// Precondition: `perm` must be valid for `getRank()`. - SparseTensorCOO *toCOO(const uint64_t *perm) const { - SparseTensorEnumeratorBase *enumerator; - newEnumerator(&enumerator, getRank(), perm); - SparseTensorCOO *coo = - new SparseTensorCOO(enumerator->permutedSizes(), values.size()); - enumerator->forallElements([&coo](const std::vector &ind, V val) { - coo->add(ind, val); - }); - // TODO: This assertion assumes there are no stored zeros, - // or if there are then that we don't filter them out. - // Cf., - assert(coo->getElements().size() == values.size()); - delete enumerator; - return coo; - } - - /// Factory method. Constructs a sparse tensor storage scheme with the given - /// dimensions, permutation, and per-dimension dense/sparse annotations, - /// using the coordinate scheme tensor for the initial contents if provided. - /// In the latter case, the coordinate scheme must respect the same - /// permutation as is desired for the new sparse tensor storage. - /// - /// Precondition: `shape`, `perm`, and `sparsity` must be valid for `rank`. - static SparseTensorStorage * - newSparseTensor(uint64_t rank, const uint64_t *shape, const uint64_t *perm, - const DimLevelType *sparsity, SparseTensorCOO *coo) { - SparseTensorStorage *n = nullptr; - if (coo) { - const auto &coosz = coo->getDimSizes(); - assertPermutedSizesMatchShape(coosz, rank, perm, shape); - n = new SparseTensorStorage(coosz, perm, sparsity, coo); - } else { - std::vector permsz(rank); - for (uint64_t r = 0; r < rank; r++) { - assert(shape[r] > 0 && "Dimension size zero has trivial storage"); - permsz[perm[r]] = shape[r]; - } - // We pass the null `coo` to ensure we select the intended constructor. - n = new SparseTensorStorage(permsz, perm, sparsity, coo); - } - return n; - } - - /// Factory method. Constructs a sparse tensor storage scheme with - /// the given dimensions, permutation, and per-dimension dense/sparse - /// annotations, using the sparse tensor for the initial contents. - /// - /// Preconditions: - /// * `shape`, `perm`, and `sparsity` must be valid for `rank`. - /// * The `tensor` must have the same value type `V`. - static SparseTensorStorage * - newSparseTensor(uint64_t rank, const uint64_t *shape, const uint64_t *perm, - const DimLevelType *sparsity, - const SparseTensorStorageBase *source) { - assert(source && "Got nullptr for source"); - SparseTensorEnumeratorBase *enumerator; - source->newEnumerator(&enumerator, rank, perm); - const auto &permsz = enumerator->permutedSizes(); - assertPermutedSizesMatchShape(permsz, rank, perm, shape); - auto *tensor = - new SparseTensorStorage(permsz, perm, sparsity, *source); - delete enumerator; - return tensor; - } - -private: - /// Appends an arbitrary new position to `pointers[d]`. This method - /// checks that `pos` is representable in the `P` type; however, it - /// does not check that `pos` is semantically valid (i.e., larger than - /// the previous position and smaller than `indices[d].capacity()`). - void appendPointer(uint64_t d, uint64_t pos, uint64_t count = 1) { - assert(isCompressedDim(d)); - assert(pos <= std::numeric_limits

::max() && - "Pointer value is too large for the P-type"); - pointers[d].insert(pointers[d].end(), count, static_cast

(pos)); - } - - /// Appends index `i` to dimension `d`, in the semantically general - /// sense. For non-dense dimensions, that means appending to the - /// `indices[d]` array, checking that `i` is representable in the `I` - /// type; however, we do not verify other semantic requirements (e.g., - /// that `i` is in bounds for `dimSizes[d]`, and not previously occurring - /// in the same segment). For dense dimensions, this method instead - /// appends the appropriate number of zeros to the `values` array, - /// where `full` is the number of "entries" already written to `values` - /// for this segment (aka one after the highest index previously appended). - void appendIndex(uint64_t d, uint64_t full, uint64_t i) { - if (isCompressedDim(d)) { - assert(i <= std::numeric_limits::max() && - "Index value is too large for the I-type"); - indices[d].push_back(static_cast(i)); - } else { // Dense dimension. - assert(i >= full && "Index was already filled"); - if (i == full) - return; // Short-circuit, since it'll be a nop. - if (d + 1 == getRank()) - values.insert(values.end(), i - full, 0); - else - finalizeSegment(d + 1, 0, i - full); - } - } - - /// Writes the given coordinate to `indices[d][pos]`. This method - /// checks that `i` is representable in the `I` type; however, it - /// does not check that `i` is semantically valid (i.e., in bounds - /// for `dimSizes[d]` and not elsewhere occurring in the same segment). - void writeIndex(uint64_t d, uint64_t pos, uint64_t i) { - assert(isCompressedDim(d)); - // Subscript assignment to `std::vector` requires that the `pos`-th - // entry has been initialized; thus we must be sure to check `size()` - // here, instead of `capacity()` as would be ideal. - assert(pos < indices[d].size() && "Index position is out of bounds"); - assert(i <= std::numeric_limits::max() && - "Index value is too large for the I-type"); - indices[d][pos] = static_cast(i); - } - - /// Computes the assembled-size associated with the `d`-th dimension, - /// given the assembled-size associated with the `(d-1)`-th dimension. - /// "Assembled-sizes" correspond to the (nominal) sizes of overhead - /// storage, as opposed to "dimension-sizes" which are the cardinality - /// of coordinates for that dimension. - /// - /// Precondition: the `pointers[d]` array must be fully initialized - /// before calling this method. - uint64_t assembledSize(uint64_t parentSz, uint64_t d) const { - if (isCompressedDim(d)) - return pointers[d][parentSz]; - // else if dense: - return parentSz * getDimSizes()[d]; - } - - /// Initializes sparse tensor storage scheme from a memory-resident sparse - /// tensor in coordinate scheme. This method prepares the pointers and - /// indices arrays under the given per-dimension dense/sparse annotations. - /// - /// Preconditions: - /// (1) the `elements` must be lexicographically sorted. - /// (2) the indices of every element are valid for `dimSizes` (equal rank - /// and pointwise less-than). - void fromCOO(const std::vector> &elements, uint64_t lo, - uint64_t hi, uint64_t d) { - uint64_t rank = getRank(); - assert(d <= rank && hi <= elements.size()); - // Once dimensions are exhausted, insert the numerical values. - if (d == rank) { - assert(lo < hi); - values.push_back(elements[lo].value); - return; - } - // Visit all elements in this interval. - uint64_t full = 0; - while (lo < hi) { // If `hi` is unchanged, then `lo < elements.size()`. - // Find segment in interval with same index elements in this dimension. - uint64_t i = elements[lo].indices[d]; - uint64_t seg = lo + 1; - while (seg < hi && elements[seg].indices[d] == i) - seg++; - // Handle segment in interval for sparse or dense dimension. - appendIndex(d, full, i); - full = i + 1; - fromCOO(elements, lo, seg, d + 1); - // And move on to next segment in interval. - lo = seg; - } - // Finalize the sparse pointer structure at this dimension. - finalizeSegment(d, full); - } - - /// Finalize the sparse pointer structure at this dimension. - void finalizeSegment(uint64_t d, uint64_t full = 0, uint64_t count = 1) { - if (count == 0) - return; // Short-circuit, since it'll be a nop. - if (isCompressedDim(d)) { - appendPointer(d, indices[d].size(), count); - } else { // Dense dimension. - const uint64_t sz = getDimSizes()[d]; - assert(sz >= full && "Segment is overfull"); - count = checkedMul(count, sz - full); - // For dense storage we must enumerate all the remaining coordinates - // in this dimension (i.e., coordinates after the last non-zero - // element), and either fill in their zero values or else recurse - // to finalize some deeper dimension. - if (d + 1 == getRank()) - values.insert(values.end(), count, 0); - else - finalizeSegment(d + 1, 0, count); - } - } - - /// Wraps up a single insertion path, inner to outer. - void endPath(uint64_t diff) { - uint64_t rank = getRank(); - assert(diff <= rank); - for (uint64_t i = 0; i < rank - diff; i++) { - const uint64_t d = rank - i - 1; - finalizeSegment(d, idx[d] + 1); - } - } - - /// Continues a single insertion path, outer to inner. - void insPath(const uint64_t *cursor, uint64_t diff, uint64_t top, V val) { - uint64_t rank = getRank(); - assert(diff < rank); - for (uint64_t d = diff; d < rank; d++) { - uint64_t i = cursor[d]; - appendIndex(d, top, i); - top = 0; - idx[d] = i; - } - values.push_back(val); - } - - /// Finds the lexicographic differing dimension. - uint64_t lexDiff(const uint64_t *cursor) const { - for (uint64_t r = 0, rank = getRank(); r < rank; r++) - if (cursor[r] > idx[r]) - return r; - else - assert(cursor[r] == idx[r] && "non-lexicographic insertion"); - assert(0 && "duplication insertion"); - return -1u; - } - - // Allow `SparseTensorEnumerator` to access the data-members (to avoid - // the cost of virtual-function dispatch in inner loops), without - // making them public to other client code. - friend class SparseTensorEnumerator; - - std::vector> pointers; - std::vector> indices; - std::vector values; - std::vector idx; // index cursor for lexicographic insertion. -}; - -/// A (higher-order) function object for enumerating the elements of some -/// `SparseTensorStorage` under a permutation. That is, the `forallElements` -/// method encapsulates the loop-nest for enumerating the elements of -/// the source tensor (in whatever order is best for the source tensor), -/// and applies a permutation to the coordinates/indices before handing -/// each element to the callback. A single enumerator object can be -/// freely reused for several calls to `forallElements`, just so long -/// as each call is sequential with respect to one another. -/// -/// N.B., this class stores a reference to the `SparseTensorStorageBase` -/// passed to the constructor; thus, objects of this class must not -/// outlive the sparse tensor they depend on. -/// -/// Design Note: The reason we define this class instead of simply using -/// `SparseTensorEnumerator` is because we need to hide/generalize -/// the `` template parameters from MLIR client code (to simplify the -/// type parameters used for direct sparse-to-sparse conversion). And the -/// reason we define the `SparseTensorEnumerator` subclasses rather -/// than simply using this class, is to avoid the cost of virtual-method -/// dispatch within the loop-nest. -template -class SparseTensorEnumeratorBase { -public: - /// Constructs an enumerator with the given permutation for mapping - /// the semantic-ordering of dimensions to the desired target-ordering. - /// - /// Preconditions: - /// * the `tensor` must have the same `V` value type. - /// * `perm` must be valid for `rank`. - SparseTensorEnumeratorBase(const SparseTensorStorageBase &tensor, - uint64_t rank, const uint64_t *perm) - : src(tensor), permsz(src.getRev().size()), reord(getRank()), - cursor(getRank()) { - assert(perm && "Received nullptr for permutation"); - assert(rank == getRank() && "Permutation rank mismatch"); - const auto &rev = src.getRev(); // source-order -> semantic-order - const auto &dimSizes = src.getDimSizes(); // in source storage-order - for (uint64_t s = 0; s < rank; s++) { // `s` source storage-order - uint64_t t = perm[rev[s]]; // `t` target-order - reord[s] = t; - permsz[t] = dimSizes[s]; - } - } - - virtual ~SparseTensorEnumeratorBase() = default; - - // We disallow copying to help avoid leaking the `src` reference. - // (In addition to avoiding the problem of slicing.) - SparseTensorEnumeratorBase(const SparseTensorEnumeratorBase &) = delete; - SparseTensorEnumeratorBase & - operator=(const SparseTensorEnumeratorBase &) = delete; - - /// Returns the source/target tensor's rank. (The source-rank and - /// target-rank are always equal since we only support permutations. - /// Though once we add support for other dimension mappings, this - /// method will have to be split in two.) - uint64_t getRank() const { return permsz.size(); } - - /// Returns the target tensor's dimension sizes. - const std::vector &permutedSizes() const { return permsz; } - - /// Enumerates all elements of the source tensor, permutes their - /// indices, and passes the permuted element to the callback. - /// The callback must not store the cursor reference directly, - /// since this function reuses the storage. Instead, the callback - /// must copy it if they want to keep it. - virtual void forallElements(ElementConsumer yield) = 0; - -protected: - const SparseTensorStorageBase &src; - std::vector permsz; // in target order. - std::vector reord; // source storage-order -> target order. - std::vector cursor; // in target order. -}; - -template -class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { - using Base = SparseTensorEnumeratorBase; - -public: - /// Constructs an enumerator with the given permutation for mapping - /// the semantic-ordering of dimensions to the desired target-ordering. - /// - /// Precondition: `perm` must be valid for `rank`. - SparseTensorEnumerator(const SparseTensorStorage &tensor, - uint64_t rank, const uint64_t *perm) - : Base(tensor, rank, perm) {} - - ~SparseTensorEnumerator() final = default; - - void forallElements(ElementConsumer yield) final { - forallElements(yield, 0, 0); - } - -private: - /// The recursive component of the public `forallElements`. - void forallElements(ElementConsumer yield, uint64_t parentPos, - uint64_t d) { - // Recover the `` type parameters of `src`. - const auto &src = - static_cast &>(this->src); - if (d == Base::getRank()) { - assert(parentPos < src.values.size() && - "Value position is out of bounds"); - // TODO: - yield(this->cursor, src.values[parentPos]); - } else if (src.isCompressedDim(d)) { - // Look up the bounds of the `d`-level segment determined by the - // `d-1`-level position `parentPos`. - const std::vector

&pointersD = src.pointers[d]; - assert(parentPos + 1 < pointersD.size() && - "Parent pointer position is out of bounds"); - const uint64_t pstart = static_cast(pointersD[parentPos]); - const uint64_t pstop = static_cast(pointersD[parentPos + 1]); - // Loop-invariant code for looking up the `d`-level coordinates/indices. - const std::vector &indicesD = src.indices[d]; - assert(pstop <= indicesD.size() && "Index position is out of bounds"); - uint64_t &cursorReordD = this->cursor[this->reord[d]]; - for (uint64_t pos = pstart; pos < pstop; pos++) { - cursorReordD = static_cast(indicesD[pos]); - forallElements(yield, pos, d + 1); - } - } else { // Dense dimension. - const uint64_t sz = src.getDimSizes()[d]; - const uint64_t pstart = parentPos * sz; - uint64_t &cursorReordD = this->cursor[this->reord[d]]; - for (uint64_t i = 0; i < sz; i++) { - cursorReordD = i; - forallElements(yield, pstart + i, d + 1); - } - } - } -}; - -/// Statistics regarding the number of nonzero subtensors in -/// a source tensor, for direct sparse=>sparse conversion a la -/// . -/// -/// N.B., this class stores references to the parameters passed to -/// the constructor; thus, objects of this class must not outlive -/// those parameters. -class SparseTensorNNZ final { -public: - /// Allocate the statistics structure for the desired sizes and - /// sparsity (in the target tensor's storage-order). This constructor - /// does not actually populate the statistics, however; for that see - /// `initialize`. - /// - /// Precondition: `dimSizes` must not contain zeros. - SparseTensorNNZ(const std::vector &dimSizes, - const std::vector &sparsity) - : dimSizes(dimSizes), dimTypes(sparsity), nnz(getRank()) { - assert(dimSizes.size() == dimTypes.size() && "Rank mismatch"); - bool uncompressed = true; - (void)uncompressed; - uint64_t sz = 1; // the product of all `dimSizes` strictly less than `r`. - for (uint64_t rank = getRank(), r = 0; r < rank; r++) { - switch (dimTypes[r]) { - case DimLevelType::kCompressed: - assert(uncompressed && - "Multiple compressed layers not currently supported"); - uncompressed = false; - nnz[r].resize(sz, 0); // Both allocate and zero-initialize. - break; - case DimLevelType::kDense: - assert(uncompressed && - "Dense after compressed not currently supported"); - break; - case DimLevelType::kSingleton: - // Singleton after Compressed causes no problems for allocating - // `nnz` nor for the yieldPos loop. This remains true even - // when adding support for multiple compressed dimensions or - // for dense-after-compressed. - break; - default: - FATAL("unsupported dimension level type"); - } - sz = checkedMul(sz, dimSizes[r]); - - } - } - - // We disallow copying to help avoid leaking the stored references. - SparseTensorNNZ(const SparseTensorNNZ &) = delete; - SparseTensorNNZ &operator=(const SparseTensorNNZ &) = delete; - - /// Returns the rank of the target tensor. - uint64_t getRank() const { return dimSizes.size(); } - - /// Enumerate the source tensor to fill in the statistics. The - /// enumerator should already incorporate the permutation (from - /// semantic-order to the target storage-order). - template - void initialize(SparseTensorEnumeratorBase &enumerator) { - assert(enumerator.getRank() == getRank() && "Tensor rank mismatch"); - assert(enumerator.permutedSizes() == dimSizes && "Tensor size mismatch"); - enumerator.forallElements( - [this](const std::vector &ind, V) { add(ind); }); - } - - /// The type of callback functions which receive an nnz-statistic. - using NNZConsumer = const std::function &; - - /// Lexicographically enumerates all indicies for dimensions strictly - /// less than `stopDim`, and passes their nnz statistic to the callback. - /// Since our use-case only requires the statistic not the coordinates - /// themselves, we do not bother to construct those coordinates. - void forallIndices(uint64_t stopDim, NNZConsumer yield) const { - assert(stopDim < getRank() && "Stopping-dimension is out of bounds"); - assert(dimTypes[stopDim] == DimLevelType::kCompressed && - "Cannot look up non-compressed dimensions"); - forallIndices(yield, stopDim, 0, 0); - } - -private: - /// Adds a new element (i.e., increment its statistics). We use - /// a method rather than inlining into the lambda in `initialize`, - /// to avoid spurious templating over `V`. And this method is private - /// to avoid needing to re-assert validity of `ind` (which is guaranteed - /// by `forallElements`). - void add(const std::vector &ind) { - uint64_t parentPos = 0; - for (uint64_t rank = getRank(), r = 0; r < rank; r++) { - if (dimTypes[r] == DimLevelType::kCompressed) - nnz[r][parentPos]++; - parentPos = parentPos * dimSizes[r] + ind[r]; - } - } - - /// Recursive component of the public `forallIndices`. - void forallIndices(NNZConsumer yield, uint64_t stopDim, uint64_t parentPos, - uint64_t d) const { - assert(d <= stopDim); - if (d == stopDim) { - assert(parentPos < nnz[d].size() && "Cursor is out of range"); - yield(nnz[d][parentPos]); - } else { - const uint64_t sz = dimSizes[d]; - const uint64_t pstart = parentPos * sz; - for (uint64_t i = 0; i < sz; i++) - forallIndices(yield, stopDim, pstart + i, d + 1); - } - } - - // All of these are in the target storage-order. - const std::vector &dimSizes; - const std::vector &dimTypes; - std::vector> nnz; -}; - -template -SparseTensorStorage::SparseTensorStorage( - const std::vector &dimSizes, const uint64_t *perm, - const DimLevelType *sparsity, const SparseTensorStorageBase &tensor) - : SparseTensorStorage(dimSizes, perm, sparsity) { - SparseTensorEnumeratorBase *enumerator; - tensor.newEnumerator(&enumerator, getRank(), perm); - { - // Initialize the statistics structure. - SparseTensorNNZ nnz(getDimSizes(), getDimTypes()); - nnz.initialize(*enumerator); - // Initialize "pointers" overhead (and allocate "indices", "values"). - uint64_t parentSz = 1; // assembled-size (not dimension-size) of `r-1`. - for (uint64_t rank = getRank(), r = 0; r < rank; r++) { - if (isCompressedDim(r)) { - pointers[r].reserve(parentSz + 1); - pointers[r].push_back(0); - uint64_t currentPos = 0; - nnz.forallIndices(r, [this, ¤tPos, r](uint64_t n) { - currentPos += n; - appendPointer(r, currentPos); - }); - assert(pointers[r].size() == parentSz + 1 && - "Final pointers size doesn't match allocated size"); - // That assertion entails `assembledSize(parentSz, r)` - // is now in a valid state. That is, `pointers[r][parentSz]` - // equals the present value of `currentPos`, which is the - // correct assembled-size for `indices[r]`. - } - // Update assembled-size for the next iteration. - parentSz = assembledSize(parentSz, r); - // Ideally we need only `indices[r].reserve(parentSz)`, however - // the `std::vector` implementation forces us to initialize it too. - // That is, in the yieldPos loop we need random-access assignment - // to `indices[r]`; however, `std::vector`'s subscript-assignment - // only allows assigning to already-initialized positions. - if (isCompressedDim(r)) - indices[r].resize(parentSz, 0); - } - values.resize(parentSz, 0); // Both allocate and zero-initialize. - } - // The yieldPos loop - enumerator->forallElements([this](const std::vector &ind, V val) { - uint64_t parentSz = 1, parentPos = 0; - for (uint64_t rank = getRank(), r = 0; r < rank; r++) { - if (isCompressedDim(r)) { - // If `parentPos == parentSz` then it's valid as an array-lookup; - // however, it's semantically invalid here since that entry - // does not represent a segment of `indices[r]`. Moreover, that - // entry must be immutable for `assembledSize` to remain valid. - assert(parentPos < parentSz && "Pointers position is out of bounds"); - const uint64_t currentPos = pointers[r][parentPos]; - // This increment won't overflow the `P` type, since it can't - // exceed the original value of `pointers[r][parentPos+1]` - // which was already verified to be within bounds for `P` - // when it was written to the array. - pointers[r][parentPos]++; - writeIndex(r, currentPos, ind[r]); - parentPos = currentPos; - } else { // Dense dimension. - parentPos = parentPos * getDimSizes()[r] + ind[r]; - } - parentSz = assembledSize(parentSz, r); - } - assert(parentPos < values.size() && "Value position is out of bounds"); - values[parentPos] = val; - }); - // No longer need the enumerator, so we'll delete it ASAP. - delete enumerator; - // The finalizeYieldPos loop - for (uint64_t parentSz = 1, rank = getRank(), r = 0; r < rank; r++) { - if (isCompressedDim(r)) { - assert(parentSz == pointers[r].size() - 1 && - "Actual pointers size doesn't match the expected size"); - // Can't check all of them, but at least we can check the last one. - assert(pointers[r][parentSz - 1] == pointers[r][parentSz] && - "Pointers got corrupted"); - // TODO: optimize this by using `memmove` or similar. - for (uint64_t n = 0; n < parentSz; n++) { - const uint64_t parentPos = parentSz - n; - pointers[r][parentPos] = pointers[r][parentPos - 1]; - } - pointers[r][0] = 0; - } - parentSz = assembledSize(parentSz, r); - } -} - -/// Helper to convert string to lower case. -static char *toLower(char *token) { - for (char *c = token; *c; c++) - *c = tolower(*c); - return token; -} - -/// This class abstracts over the information stored in file headers, -/// as well as providing the buffers and methods for parsing those headers. -class SparseTensorFile final { -public: - enum class ValueKind { - kInvalid = 0, - kPattern = 1, - kReal = 2, - kInteger = 3, - kComplex = 4, - kUndefined = 5 - }; - - explicit SparseTensorFile(char *filename) : filename(filename) { - assert(filename && "Received nullptr for filename"); - } - - // Disallows copying, to avoid duplicating the `file` pointer. - SparseTensorFile(const SparseTensorFile &) = delete; - SparseTensorFile &operator=(const SparseTensorFile &) = delete; - - // This dtor tries to avoid leaking the `file`. (Though it's better - // to call `closeFile` explicitly when possible, since there are - // circumstances where dtors are not called reliably.) - ~SparseTensorFile() { closeFile(); } - - /// Opens the file for reading. - void openFile() { - if (file) - FATAL("Already opened file %s\n", filename); - file = fopen(filename, "r"); - if (!file) - FATAL("Cannot find file %s\n", filename); - } - - /// Closes the file. - void closeFile() { - if (file) { - fclose(file); - file = nullptr; - } - } - - // TODO(wrengr/bixia): figure out how to reorganize the element-parsing - // loop of `openSparseTensorCOO` into methods of this class, so we can - // avoid leaking access to the `line` pointer (both for general hygiene - // and because we can't mark it const due to the second argument of - // `strtoul`/`strtoud` being `char * *restrict` rather than - // `char const* *restrict`). - // - /// Attempts to read a line from the file. - char *readLine() { - if (fgets(line, kColWidth, file)) - return line; - FATAL("Cannot read next line of %s\n", filename); - } - - /// Reads and parses the file's header. - void readHeader() { - assert(file && "Attempt to readHeader() before openFile()"); - if (strstr(filename, ".mtx")) - readMMEHeader(); - else if (strstr(filename, ".tns")) - readExtFROSTTHeader(); - else - FATAL("Unknown format %s\n", filename); - assert(isValid() && "Failed to read the header"); - } - - ValueKind getValueKind() const { return valueKind_; } - - bool isValid() const { return valueKind_ != ValueKind::kInvalid; } - - /// Gets the MME "pattern" property setting. Is only valid after - /// parsing the header. - bool isPattern() const { - assert(isValid() && "Attempt to isPattern() before readHeader()"); - return valueKind_ == ValueKind::kPattern; - } - - /// Gets the MME "symmetric" property setting. Is only valid after - /// parsing the header. - bool isSymmetric() const { - assert(isValid() && "Attempt to isSymmetric() before readHeader()"); - return isSymmetric_; - } - - /// Gets the rank of the tensor. Is only valid after parsing the header. - uint64_t getRank() const { - assert(isValid() && "Attempt to getRank() before readHeader()"); - return idata[0]; - } - - /// Gets the number of non-zeros. Is only valid after parsing the header. - uint64_t getNNZ() const { - assert(isValid() && "Attempt to getNNZ() before readHeader()"); - return idata[1]; - } - - /// Gets the dimension-sizes array. The pointer itself is always - /// valid; however, the values stored therein are only valid after - /// parsing the header. - const uint64_t *getDimSizes() const { return idata + 2; } - - /// Safely gets the size of the given dimension. Is only valid - /// after parsing the header. - uint64_t getDimSize(uint64_t d) const { - assert(d < getRank()); - return idata[2 + d]; - } - - /// Asserts the shape subsumes the actual dimension sizes. Is only - /// valid after parsing the header. - void assertMatchesShape(uint64_t rank, const uint64_t *shape) const { - assert(rank == getRank() && "Rank mismatch"); - for (uint64_t r = 0; r < rank; r++) - assert((shape[r] == 0 || shape[r] == idata[2 + r]) && - "Dimension size mismatch"); - } - -private: - void readMMEHeader(); - void readExtFROSTTHeader(); - - const char *filename; - FILE *file = nullptr; - ValueKind valueKind_ = ValueKind::kInvalid; - bool isSymmetric_ = false; - uint64_t idata[512]; - char line[kColWidth]; -}; - -/// Read the MME header of a general sparse matrix of type real. -void SparseTensorFile::readMMEHeader() { - char header[64]; - char object[64]; - char format[64]; - char field[64]; - char symmetry[64]; - // Read header line. - if (fscanf(file, "%63s %63s %63s %63s %63s\n", header, object, format, field, - symmetry) != 5) - FATAL("Corrupt header in %s\n", filename); - // Process `field`, which specify pattern or the data type of the values. - if (strcmp(toLower(field), "pattern") == 0) - valueKind_ = ValueKind::kPattern; - else if (strcmp(toLower(field), "real") == 0) - valueKind_ = ValueKind::kReal; - else if (strcmp(toLower(field), "integer") == 0) - valueKind_ = ValueKind::kInteger; - else if (strcmp(toLower(field), "complex") == 0) - valueKind_ = ValueKind::kComplex; - else - FATAL("Unexpected header field value in %s\n", filename); - - // Set properties. - isSymmetric_ = (strcmp(toLower(symmetry), "symmetric") == 0); - // Make sure this is a general sparse matrix. - if (strcmp(toLower(header), "%%matrixmarket") || - strcmp(toLower(object), "matrix") || - strcmp(toLower(format), "coordinate") || - (strcmp(toLower(symmetry), "general") && !isSymmetric_)) - FATAL("Cannot find a general sparse matrix in %s\n", filename); - // Skip comments. - while (true) { - readLine(); - if (line[0] != '%') - break; - } - // Next line contains M N NNZ. - idata[0] = 2; // rank - if (sscanf(line, "%" PRIu64 "%" PRIu64 "%" PRIu64 "\n", idata + 2, idata + 3, - idata + 1) != 3) - FATAL("Cannot find size in %s\n", filename); -} - -/// Read the "extended" FROSTT header. Although not part of the documented -/// format, we assume that the file starts with optional comments followed -/// by two lines that define the rank, the number of nonzeros, and the -/// dimensions sizes (one per rank) of the sparse tensor. -void SparseTensorFile::readExtFROSTTHeader() { - // Skip comments. - while (true) { - readLine(); - if (line[0] != '#') - break; - } - // Next line contains RANK and NNZ. - if (sscanf(line, "%" PRIu64 "%" PRIu64 "\n", idata, idata + 1) != 2) - FATAL("Cannot find metadata in %s\n", filename); - // Followed by a line with the dimension sizes (one per rank). - for (uint64_t r = 0; r < idata[0]; r++) - if (fscanf(file, "%" PRIu64, idata + 2 + r) != 1) - FATAL("Cannot find dimension size %s\n", filename); - readLine(); // end of line - // The FROSTT format does not define the data type of the nonzero elements. - valueKind_ = ValueKind::kUndefined; -} - -// Adds a value to a tensor in coordinate scheme. If is_symmetric_value is true, -// also adds the value to its symmetric location. -template -static inline void addValue(T *coo, V value, - const std::vector indices, - bool is_symmetric_value) { - // TODO: - coo->add(indices, value); - // We currently chose to deal with symmetric matrices by fully constructing - // them. In the future, we may want to make symmetry implicit for storage - // reasons. - if (is_symmetric_value) - coo->add({indices[1], indices[0]}, value); -} - -// Reads an element of a complex type for the current indices in coordinate -// scheme. -template -static inline void readCOOValue(SparseTensorCOO> *coo, - const std::vector indices, - char **linePtr, bool is_pattern, - bool add_symmetric_value) { - // Read two values to make a complex. The external formats always store - // numerical values with the type double, but we cast these values to the - // sparse tensor object type. For a pattern tensor, we arbitrarily pick the - // value 1 for all entries. - V re = is_pattern ? 1.0 : strtod(*linePtr, linePtr); - V im = is_pattern ? 1.0 : strtod(*linePtr, linePtr); - std::complex value = {re, im}; - addValue(coo, value, indices, add_symmetric_value); -} - -// Reads an element of a non-complex type for the current indices in coordinate -// scheme. -template , V>::value && - !std::is_same, V>::value>::type * = nullptr> -static void inline readCOOValue(SparseTensorCOO *coo, - const std::vector indices, - char **linePtr, bool is_pattern, - bool is_symmetric_value) { - // The external formats always store these numerical values with the type - // double, but we cast these values to the sparse tensor object type. - // For a pattern tensor, we arbitrarily pick the value 1 for all entries. - double value = is_pattern ? 1.0 : strtod(*linePtr, linePtr); - addValue(coo, value, indices, is_symmetric_value); -} - -/// Reads a sparse tensor with the given filename into a memory-resident -/// sparse tensor in coordinate scheme. -template -static SparseTensorCOO * -openSparseTensorCOO(char *filename, uint64_t rank, const uint64_t *shape, - const uint64_t *perm, PrimaryType valTp) { - SparseTensorFile stfile(filename); - stfile.openFile(); - stfile.readHeader(); - // Check tensor element type against the value type in the input file. - SparseTensorFile::ValueKind valueKind = stfile.getValueKind(); - bool tensorIsInteger = - (valTp >= PrimaryType::kI64 && valTp <= PrimaryType::kI8); - bool tensorIsReal = (valTp >= PrimaryType::kF64 && valTp <= PrimaryType::kI8); - if ((valueKind == SparseTensorFile::ValueKind::kReal && tensorIsInteger) || - (valueKind == SparseTensorFile::ValueKind::kComplex && tensorIsReal)) { - FATAL("Tensor element type %d not compatible with values in file %s\n", - static_cast(valTp), filename); - } - stfile.assertMatchesShape(rank, shape); - // Prepare sparse tensor object with per-dimension sizes - // and the number of nonzeros as initial capacity. - uint64_t nnz = stfile.getNNZ(); - auto *coo = SparseTensorCOO::newSparseTensorCOO(rank, stfile.getDimSizes(), - perm, nnz); - // Read all nonzero elements. - std::vector indices(rank); - for (uint64_t k = 0; k < nnz; k++) { - char *linePtr = stfile.readLine(); - for (uint64_t r = 0; r < rank; r++) { - uint64_t idx = strtoul(linePtr, &linePtr, 10); - // Add 0-based index. - indices[perm[r]] = idx - 1; - } - readCOOValue(coo, indices, &linePtr, stfile.isPattern(), - stfile.isSymmetric() && indices[0] != indices[1]); - } - // Close the file and return tensor. - stfile.closeFile(); - return coo; -} - -/// Writes the sparse tensor to `dest` in extended FROSTT format. -template -static void outSparseTensor(void *tensor, void *dest, bool sort) { - assert(tensor && dest); - auto coo = static_cast *>(tensor); - if (sort) - coo->sort(); - char *filename = static_cast(dest); - auto &dimSizes = coo->getDimSizes(); - auto &elements = coo->getElements(); - uint64_t rank = coo->getRank(); - uint64_t nnz = elements.size(); - std::fstream file; - file.open(filename, std::ios_base::out | std::ios_base::trunc); - assert(file.is_open()); - file << "; extended FROSTT format\n" << rank << " " << nnz << std::endl; - for (uint64_t r = 0; r < rank - 1; r++) - file << dimSizes[r] << " "; - file << dimSizes[rank - 1] << std::endl; - for (uint64_t i = 0; i < nnz; i++) { - auto &idx = elements[i].indices; - for (uint64_t r = 0; r < rank; r++) - file << (idx[r] + 1) << " "; - file << elements[i].value << std::endl; - } - file.flush(); - file.close(); - assert(file.good()); -} - /// Initializes sparse tensor from an external COO-flavored format. template static SparseTensorStorage * @@ -1437,13 +86,14 @@ std::sort(order.begin(), order.end()); for (uint64_t i = 0; i < rank; ++i) if (i != order[i]) - FATAL("Not a permutation of 0..%" PRIu64 "\n", rank); + MLIR_SPARSETENSOR_FATAL("Not a permutation of 0..%" PRIu64 "\n", rank); // Verify that the sparsity values are supported. for (uint64_t i = 0; i < rank; ++i) if (sparsity[i] != DimLevelType::kDense && sparsity[i] != DimLevelType::kCompressed) - FATAL("Unsupported sparsity value %d\n", static_cast(sparsity[i])); + MLIR_SPARSETENSOR_FATAL("Unsupported sparsity value %d\n", + static_cast(sparsity[i])); #endif // Convert external format to internal COO. @@ -1677,9 +327,10 @@ // Unsupported case (add above if needed). // TODO: better pretty-printing of enum values! - FATAL("unsupported combination of types: \n", - static_cast(ptrTp), static_cast(indTp), - static_cast(valTp)); + MLIR_SPARSETENSOR_FATAL( + "unsupported combination of types: \n", + static_cast(ptrTp), static_cast(indTp), + static_cast(valTp)); } #undef CASE #undef CASE_SECSAME @@ -1834,7 +485,7 @@ sprintf(var, "TENSOR%" PRIu64, id); char *env = getenv(var); if (!env) - FATAL("Environment variable %s is not set\n", var); + MLIR_SPARSETENSOR_FATAL("Environment variable %s is not set\n", var); return env; } diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -2064,7 +2064,6 @@ hdrs = [ "include/mlir/Dialect/SparseTensor/Transforms/BufferizableOpInterfaceImpl.h", "include/mlir/Dialect/SparseTensor/Transforms/Passes.h", - "include/mlir/ExecutionEngine/SparseTensorUtils.h", ], includes = ["include"], deps = [ @@ -2091,7 +2090,7 @@ ":TensorDialect", ":Transforms", ":VectorDialect", - ":mlir_c_runner_utils", + ":mlir_sparse_tensor_utils", "//llvm:Support", ], ) @@ -6547,20 +6546,71 @@ deps = [":mlir_async_runtime"], ) +cc_library( + name = "_mlir_float16_utils", + srcs = ["lib/ExecutionEngine/Float16bits.cpp"], + hdrs = ["include/mlir/ExecutionEngine/Float16bits.h"], + copts = ["-Dmlir_float16_utils_EXPORTS"], + includes = ["include"], +) + +# Indirection to avoid 'libmlir_float16_utils.so' filename clash. +alias( + name = "mlir_float16_utils", + actual = "_mlir_float16_utils", +) + +cc_binary( + name = "libmlir_float16_utils.so", + linkshared = True, + linkstatic = False, + deps = [":mlir_float16_utils"], +) + +# Unlike mlir_float16_utils, mlir_c_runner_utils, etc, we do *not* make +# this a shared library: because on the CMake side, doing so causes +# issues when building on Windows. +# +# We relist Float16bits.h because Enums.h includes it; rather than +# forcing all direct-dependants state that they also directly-depend +# on :mlir_float16_utils (to satisfy the layering_check). +cc_library( + name = "mlir_sparse_tensor_utils", + srcs = [ + "lib/ExecutionEngine/SparseTensor/File.cpp", + "lib/ExecutionEngine/SparseTensor/NNZ.cpp", + "lib/ExecutionEngine/SparseTensor/Storage.cpp", + ], + hdrs = [ + "include/mlir/ExecutionEngine/Float16bits.h", + "include/mlir/ExecutionEngine/SparseTensor/COO.h", + "include/mlir/ExecutionEngine/SparseTensor/CheckedMul.h", + "include/mlir/ExecutionEngine/SparseTensor/Enums.h", + "include/mlir/ExecutionEngine/SparseTensor/ErrorHandling.h", + "include/mlir/ExecutionEngine/SparseTensor/File.h", + "include/mlir/ExecutionEngine/SparseTensor/Storage.h", + ], + copts = ["-Dmlir_sparse_tensor_utils_EXPORTS"], + includes = ["include"], + deps = [":mlir_float16_utils"], +) + +# We relist Enums.h because SparseTensorUtils.h includes/reexports it +# as part of the public API. cc_library( name = "_mlir_c_runner_utils", srcs = [ "lib/ExecutionEngine/CRunnerUtils.cpp", - "lib/ExecutionEngine/Float16bits.cpp", "lib/ExecutionEngine/SparseTensorUtils.cpp", ], hdrs = [ "include/mlir/ExecutionEngine/CRunnerUtils.h", - "include/mlir/ExecutionEngine/Float16bits.h", "include/mlir/ExecutionEngine/Msan.h", + "include/mlir/ExecutionEngine/SparseTensor/Enums.h", "include/mlir/ExecutionEngine/SparseTensorUtils.h", ], includes = ["include"], + deps = [":mlir_sparse_tensor_utils"], ) # Indirection to avoid 'libmlir_c_runner_utils.so' filename clash.