diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Attributes.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Attributes.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Attributes.h @@ -0,0 +1,75 @@ +//===- Attributes.h - C++ attributes for SparseTensorRuntime ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header defines various macros for using C++ attributes whenever +// they're supported by the compiler. These macros are the same as the +// versions in the LLVMSupport library, but we define our own versions +// in order to avoid introducing that dependency just for the sake of +// these macros. (If we ever do end up depending on LLVMSupport, then +// we should remove this header and use "llvm/Support/Compiler.h" instead.) +// +// This file is part of the lightweight runtime support library for sparse +// tensor manipulations. The functionality of the support library is meant +// to simplify benchmarking, testing, and debugging MLIR code operating on +// sparse tensors. However, the provided functionality is **not** part of +// core MLIR itself. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_ATTRIBUTES_H +#define MLIR_EXECUTIONENGINE_SPARSETENSOR_ATTRIBUTES_H + +// A wrapper around `__has_cpp_attribute` for C++11 style attributes, +// which are defined by ISO C++ SD-6. +// +#if defined(__cplusplus) && defined(__has_cpp_attribute) +// NOTE: The __cplusplus requirement should be unnecessary, but guards +// against issues with GCC . +#define MLIR_SPARSETENSOR_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) +#else +#define MLIR_SPARSETENSOR_HAS_CPP_ATTRIBUTE(x) 0 +#endif + +// A wrapper around `__has_attribute`, which is defined by GCC 5+ and Clang. +// GCC: +// Clang: +#ifdef __has_attribute +#define MLIR_SPARSETENSOR_HAS_ATTRIBUTE(x) __has_attribute(x) +#else +#define MLIR_SPARSETENSOR_HAS_ATTRIBUTE(x) 0 +#endif + +// An attribute for non-owning classes (like `PermutationRef`) to enable +// lifetime warnings. +#if MLIR_SPARSETENSOR_HAS_CPP_ATTRIBUTE(gsl::Pointer) +#define MLIR_SPARSETENSOR_GSL_POINTER [[gsl::Pointer]] +#else +#define MLIR_SPARSETENSOR_GSL_POINTER +#endif + +// An attribute for functions which are "pure" in the following sense: +// * the result depends only on the function arguments +// * pointer arguments are allowed to be read from, but not written to +// * has no observable effects other than the return value +// +// This allows the compiler to avoid repeated function calls whenever +// it can determine that the arguments are the same and that memory has +// not changed. +// +// This macro is called `LLVM_READONLY` by LLVMSupport. This definition +// differs slightly from the LLVM version by using `gnu::pure` when +// available, like Abseil's `ABSL_ATTRIBUTE_PURE_FUNCTION`. +#if MLIR_SPARSETENSOR_HAS_CPP_ATTRIBUTE(gnu::pure) +#define MLIR_SPARSETENSOR_PURE [[gnu::pure]] +#elif MLIR_SPARSETENSOR_HAS_ATTRIBUTE(pure) || defined(__GNUC__) +#define MLIR_SPARSETENSOR_PURE __attribute__((pure)) +#else +#define MLIR_SPARSETENSOR_PURE +#endif + +#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_ATTRIBUTES_H diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h @@ -17,6 +17,8 @@ #ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_COO_H #define MLIR_EXECUTIONENGINE_SPARSETENSOR_COO_H +#include "mlir/ExecutionEngine/SparseTensor/PermutationRef.h" + #include #include #include @@ -102,34 +104,40 @@ using difference_type = typename vector_type::difference_type; using size_type = typename vector_type::size_type; - SparseTensorCOO(const std::vector &dimSizes, uint64_t capacity) - : dimSizes(dimSizes), isSorted(true) { + /// Constructs a new coordinate-scheme sparse tensor with the given + /// sizes and initial storage capacity. + /// + /// Asserts: + /// * `dimSizes` has nonzero size. + /// * the elements of `dimSizes` are nonzero. + explicit SparseTensorCOO(const std::vector &dimSizes, + uint64_t capacity = 0) + : SparseTensorCOO(dimSizes.size(), dimSizes.data(), capacity) {} + + // TODO: make a class for capturing known-valid sizes (a la PermutationRef), + // so that `SparseTensorStorage::toCOO` can avoid redoing these assertions. + // Also so that we can enforce the asserts *before* copying into `dimSizes`. + // + /// Constructs a new coordinate-scheme sparse tensor with the given + /// sizes and initial storage capacity. + /// + /// Precondition: `dimSizes` must be valid for `dimRank`. + /// + /// Asserts: + /// * `dimRank` is nonzero. + /// * the elements of `dimSizes` are nonzero. + explicit SparseTensorCOO(uint64_t dimRank, const uint64_t *dimSizes, + uint64_t capacity = 0) + : dimSizes(dimSizes, dimSizes + dimRank), isSorted(true) { + assert(dimRank > 0 && "Trivial shape is not supported"); + for (uint64_t d = 0; d < dimRank; ++d) + assert(dimSizes[d] > 0 && "Dimension size zero has trivial storage"); if (capacity) { elements.reserve(capacity); - indices.reserve(capacity * getRank()); + indices.reserve(capacity * dimRank); } } - /// Factory method. Permutes the original dimensions according to - /// the given ordering and expects subsequent add() calls to honor - /// that same ordering for the given indices. The result is a - /// fully permuted coordinate scheme. - /// - /// Precondition: `dimSizes` and `perm` must be valid for `rank`. - /// - /// Asserts: the elements of `dimSizes` are non-zero. - static SparseTensorCOO *newSparseTensorCOO(uint64_t rank, - const uint64_t *dimSizes, - const uint64_t *perm, - uint64_t capacity = 0) { - std::vector permsz(rank); - for (uint64_t r = 0; r < rank; ++r) { - assert(dimSizes[r] > 0 && "Dimension size zero has trivial storage"); - permsz[perm[r]] = dimSizes[r]; - } - return new SparseTensorCOO(permsz, capacity); - } - /// Gets the rank of the tensor. uint64_t getRank() const { return dimSizes.size(); } diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h @@ -26,6 +26,7 @@ #ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H #define MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H +#include "mlir/ExecutionEngine/SparseTensor/PermutationRef.h" #include "mlir/ExecutionEngine/SparseTensor/Storage.h" #include @@ -113,9 +114,6 @@ /// Closes the file. void closeFile(); - /// Attempts to read a line from the file. - char *readLine(); - /// Reads and parses the file's header. void readHeader(); @@ -174,28 +172,23 @@ /// returns the value of the element. Stores the coordinates of the element /// to the `indices` array. template - V readCOOElement(uint64_t rank, uint64_t *indices, - const uint64_t *perm = nullptr) { - assert(rank == getRank() && "Rank mismatch"); - char *linePtr = readLine(); - if (perm) - for (uint64_t r = 0; r < rank; ++r) { - // Parse the 1-based index. - uint64_t idx = strtoul(linePtr, &linePtr, 10); - // Store the 0-based index. - indices[perm[r]] = idx - 1; - } - else - for (uint64_t r = 0; r < rank; ++r) { - // Parse the 1-based index. - uint64_t idx = strtoul(linePtr, &linePtr, 10); - // Store the 0-based index. - indices[r] = idx - 1; - } + V readCOOElement(uint64_t rank, uint64_t *indices) { + char *linePtr = readCOOIndices(rank, indices); return detail::readCOOValue(&linePtr, isPattern()); } private: + /// Attempts to read a line from the file. Is private because there's + /// no reason for client code to call it. + void readLine(); + + /// Reads the next line of the input file and parses the coordinates + /// into the `indices` argument. Returns the position in the `line` + /// buffer where the element's value should be parsed from. This method + /// has been factored out from `readCOOElement` to minimize code bloat + /// for the generated library. + char *readCOOIndices(uint64_t rank, uint64_t *indices); + /// Reads the MME header of a general sparse matrix of type real. void readMMEHeader(); @@ -217,40 +210,71 @@ //===----------------------------------------------------------------------===// /// Reads a sparse tensor with the given filename into a memory-resident -/// sparse tensor in coordinate scheme. -template -inline SparseTensorCOO * -openSparseTensorCOO(const char *filename, uint64_t rank, const uint64_t *shape, - const uint64_t *perm, PrimaryType valTp) { +/// sparse tensor. +/// +/// Preconditions: +/// * `dimShape` and `dim2lvl` must be valid for `dimRank`. +/// * `lvlTypes` and `lvl2dim` must be valid for `lvlRank`. +/// * `dim2lvl` is the inverse of `lvl2dim`. +/// +/// Asserts: +/// * the file's actual value type can be read as `valTp`. +/// * the file's actual dimension-sizes match the expected `dimShape`. +/// * `dim2lvl` is a permutation, and therefore also `dimRank == lvlRank`. +// +// TODO: As currently written, this function uses `dim2lvl` in two +// places: first, to construct the level-sizes from the file's actual +// dimension-sizes; and second, to map the file's dimension-indices into +// level-indices. The latter can easily generalize to arbitrary mappings, +// however the former cannot. Thus, once we functionalize the mappings, +// this function will need both the sizes-to-sizes and indices-to-indices +// variants of the `dim2lvl` mapping. For the `lvl2dim` direction we only +// need the indices-to-indices variant, for handing off to `newFromCOO`. +template +inline SparseTensorStorage * +openSparseTensor(uint64_t dimRank, const uint64_t *dimShape, uint64_t lvlRank, + const DimLevelType *lvlTypes, const uint64_t *lvl2dim, + const uint64_t *dim2lvl, const char *filename, + PrimaryType valTp) { + // Read the file's header and check the file's actual element type and + // dimension-sizes against the expected element type and dimension-shape. SparseTensorReader stfile(filename); stfile.openFile(); stfile.readHeader(); - // Check tensor element type against the value type in the input file. if (!stfile.canReadAs(valTp)) MLIR_SPARSETENSOR_FATAL( "Tensor element type %d not compatible with values in file %s\n", static_cast(valTp), filename); - stfile.assertMatchesShape(rank, shape); - // Prepare sparse tensor object with per-dimension sizes - // and the number of nonzeros as initial capacity. + stfile.assertMatchesShape(dimRank, dimShape); + const uint64_t *dimSizes = stfile.getDimSizes(); + // Construct the level-sizes from the file's dimension-sizes + // TODO: This doesn't generalize to arbitrary mappings. (See above.) + assert(dimRank == lvlRank && "Rank mismatch"); + detail::PermutationRef d2l(dimRank, dim2lvl); + std::vector lvlSizes = d2l.pushforward(dimRank, dimSizes); + // Prepare a COO object with the number of nonzeros as initial capacity. uint64_t nnz = stfile.getNNZ(); - auto *coo = SparseTensorCOO::newSparseTensorCOO(rank, stfile.getDimSizes(), - perm, nnz); + auto *lvlCOO = new SparseTensorCOO(lvlSizes, nnz); // Read all nonzero elements. - std::vector indices(rank); + std::vector dimInd(dimRank); + std::vector lvlInd(lvlRank); for (uint64_t k = 0; k < nnz; ++k) { - const V value = stfile.readCOOElement(rank, indices.data(), perm); + const V value = stfile.readCOOElement(dimRank, dimInd.data()); + d2l.pushforward(dimRank, dimInd.data(), lvlInd.data()); // TODO: - coo->add(indices, value); + lvlCOO->add(lvlInd, value); // We currently chose to deal with symmetric matrices by fully // constructing them. In the future, we may want to make symmetry // implicit for storage reasons. - if (stfile.isSymmetric() && indices[0] != indices[1]) - coo->add({indices[1], indices[0]}, value); + if (stfile.isSymmetric() && lvlInd[0] != lvlInd[1]) + lvlCOO->add({lvlInd[1], lvlInd[0]}, value); } - // Close the file and return tensor. + // Close the file, convert the COO to SparseTensorStorage, and return. stfile.closeFile(); - return coo; + auto *tensor = SparseTensorStorage::newFromCOO( + dimRank, dimSizes, lvlRank, lvlTypes, lvl2dim, *lvlCOO); + delete lvlCOO; + return tensor; } /// Writes the sparse tensor to `filename` in extended FROSTT format. diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/PermutationRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/PermutationRef.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/PermutationRef.h @@ -0,0 +1,142 @@ +//===- PermutationRef.h - Permutation reference wrapper ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header is not part of the public API. It is placed in the +// includes directory only because that's required by the implementations +// of template-classes. +// +// This file is part of the lightweight runtime support library for sparse +// tensor manipulations. The functionality of the support library is meant +// to simplify benchmarking, testing, and debugging MLIR code operating on +// sparse tensors. However, the provided functionality is **not** part of +// core MLIR itself. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_PERMUTATIONREF_H +#define MLIR_EXECUTIONENGINE_SPARSETENSOR_PERMUTATIONREF_H + +#include "mlir/ExecutionEngine/SparseTensor/Attributes.h" +#include "mlir/ExecutionEngine/SparseTensor/ErrorHandling.h" + +#include +#include +#include + +namespace mlir { +namespace sparse_tensor { +namespace detail { + +/// Checks whether the `perm` array is a permutation of `[0 .. size)`. +MLIR_SPARSETENSOR_PURE bool isPermutation(uint64_t size, const uint64_t *perm); + +/// Wrapper around `isPermutation` to ensure consistent error messages. +inline void assertIsPermutation(uint64_t size, const uint64_t *perm) { +#ifndef NDEBUG + if (!isPermutation(size, perm)) + MLIR_SPARSETENSOR_FATAL("Not a permutation of [0..%" PRIu64 ")\n", size); +#endif +} + +// TODO: To implement things like `inverse` and `compose` while preserving +// the knowledge that `isPermutation` is true, we'll need to also have +// an owning version of `PermutationRef`. (Though ideally we'll really +// want to defunctionalize those methods so that we can avoid intermediate +// arrays/copies and only materialize the data on request.) + +/// A non-owning class for capturing the knowledge that `isPermutation` +/// is true, to avoid needing to assert it repeatedly. +class MLIR_SPARSETENSOR_GSL_POINTER [[nodiscard]] PermutationRef final { +public: + /// Asserts `isPermutation` and returns the witness to that being true. + // + // TODO: For now the assertive ctor is sufficient, but in principle + // we'll want a factory that can optionally construct the object + // (so callers can handle errors themselves). + explicit PermutationRef(uint64_t size, const uint64_t *perm) + : permSize(size), perm(perm) { + assertIsPermutation(size, perm); + } + + uint64_t size() const { return permSize; } + + const uint64_t *data() const { return perm; } + + const uint64_t &operator[](uint64_t i) const { + assert(i < permSize && "index is out of bounds"); + return perm[i]; + } + + /// Constructs a pushforward array of values. This method is the inverse + /// of `permute` in the sense that for all `p` and `xs` we have: + /// * `p.permute(p.pushforward(xs)) == xs` + /// * `p.pushforward(p.permute(xs)) == xs` + template + inline std::vector pushforward(const std::vector &values) const { + return pushforward(values.size(), values.data()); + } + + template + inline std::vector pushforward(uint64_t size, const T *values) const { + std::vector out(permSize); + pushforward(size, values, out.data()); + return out; + } + + // NOTE: This form of the method is required by `toMLIRSparseTensor`, + // so it can reuse the `out` buffer for each iteration of a loop. + template + inline void pushforward(uint64_t size, const T *values, T *out) const { + assert(size == permSize && "size mismatch"); + for (uint64_t i = 0; i < permSize; ++i) + out[perm[i]] = values[i]; + } + + // NOTE: this is only needed by `toMLIRSparseTensor`, which in + // turn only needs it as a vector to hand off to `newSparseTensor`. + // Otherwise we would want the result to be an owning-permutation, + // to retain the knowledge that `isPermutation` is true. + // + /// Constructs the inverse permutation. This is equivalent to calling + /// `pushforward` with `std::iota` for the values. + std::vector inverse() const; + + /// Constructs a permuted array of values. This method is the inverse + /// of `pushforward` in the sense that for all `p` and `xs` we have: + /// * `p.permute(p.pushforward(xs)) == xs` + /// * `p.pushforward(p.permute(xs)) == xs` + template + inline std::vector permute(const std::vector &values) const { + return permute(values.size(), values.data()); + } + + template + inline std::vector permute(uint64_t size, const T *values) const { + std::vector out(permSize); + permute(size, values, out.data()); + return out; + } + + template + inline void permute(uint64_t size, const T *values, T *out) const { + assert(size == permSize && "size mismatch"); + for (uint64_t i = 0; i < permSize; ++i) + out[i] = values[perm[i]]; + return out; + } + +private: + const uint64_t permSize; + const uint64_t *const perm; // non-owning pointer. +}; + +} // namespace detail +} // namespace sparse_tensor +} // namespace mlir + +#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_PERMUTATIONREF_H diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -35,6 +35,7 @@ #include "mlir/Dialect/SparseTensor/IR/Enums.h" #include "mlir/ExecutionEngine/Float16bits.h" +#include "mlir/ExecutionEngine/SparseTensor/Attributes.h" #include "mlir/ExecutionEngine/SparseTensor/COO.h" #include "mlir/ExecutionEngine/SparseTensor/CheckedMul.h" #include "mlir/ExecutionEngine/SparseTensor/ErrorHandling.h" @@ -50,14 +51,22 @@ // These macros ensure consistent error messages, without risk of incuring // an additional method call to do so. -#define ASSERT_VALID_DIM(d) \ - assert(d < getRank() && "Dimension index is out of bounds"); -#define ASSERT_COMPRESSED_DIM(d) \ - assert(isCompressedDim(d) && "Dimension is not compressed"); -#define ASSERT_COMPRESSED_OR_SINGLETON_DIM(d) \ - assert((isCompressedDim(d) || isSingletonDim(d)) && \ - "Dimension is neither compressed nor singleton"); -#define ASSERT_DENSE_DIM(d) assert(isDenseDim(d) && "Dimension is not dense"); +#define ASSERT_VALID_LVL(l) \ + assert(l < getLvlRank() && "Level index is out of bounds"); +#define ASSERT_COMPRESSED_LVL(l) \ + assert(isCompressedLvl(l) && "Level is not compressed"); +#define ASSERT_COMPRESSED_OR_SINGLETON_LVL(l) \ + do { \ + const DimLevelType dlt = getLvlType(l); \ + (void)dlt; \ + assert((isCompressedDLT(dlt) || isSingletonDLT(dlt)) && \ + "Level is neither compressed nor singleton"); \ + } while (false) +// Because the `SparseTensorStorageBase` ctor uses `MLIR_SPARSETENSOR_FATAL` +// (rather than `assert`) when validating level-types, all the uses of +// `ASSERT_DENSE_DLT` are technically unnecessary. However, they are +// retained for the sake of future-proofing. +#define ASSERT_DENSE_DLT(dlt) assert(isDenseDLT(dlt) && "Level is not dense"); /// Abstract base class for `SparseTensorStorage`. This class /// takes responsibility for all the ``-independent aspects @@ -65,6 +74,41 @@ /// we use function overloading to implement "partial" method /// specialization, which the C-API relies on to catch type errors /// arising from our use of opaque pointers. +/// +/// Because this class forms a bridge between the denotational semantics +/// of "tensors" and the operational semantics of how we store and +/// compute with them, it also distinguishes between two different +/// coordinate spaces (and their associated rank, shape, sizes, etc). +/// Denotationally, we have the *dimensions* of the tensor represented +/// by this object. Operationally, we have the *levels* of the storage +/// representation itself. We use this "dimension" vs "level" terminology +/// throughout, since alternative terminology like "tensor-dimension", +/// "original-dimension", "storage-dimension", etc, is both more verbose +/// and prone to introduce confusion whenever the qualifiers are dropped. +/// Where necessary, we use "axis" as the generic term. +/// +/// The *size* of an axis is the cardinality of possible coordinate/index +/// values along that axis (regardless of which coordinates have stored +/// element values). As such, each size must be non-zero since if any +/// axis has size-zero then the whole tensor would have trivial storage +/// (since there are no possible coordinates). Thus we use the plural +/// term *sizes* for a collection of non-zero cardinalities, and use +/// this term whenever referring to run-time cardinalities. Whereas we +/// use the term *shape* for a collection of compile-time cardinalities, +/// where zero is used to indicate cardinalities which are dynamic (i.e., +/// unknown/unspecified at compile-time). At run-time, these dynamic +/// cardinalities will be inferred from or checked against sizes otherwise +/// specified. Thus, dynamic cardinalities always have an "immutable but +/// unknown" value; so the term "dynamic" should not be taken to indicate +/// run-time mutability. +// +// TODO: we'd like to factor out a class akin to `PermutationRef` for +// capturing known-valid sizes to avoid redundant validity assertions. +// But calling that class "SizesRef" would be a terrible name (and +// "ValidSizesRef" isn't much better). Whereas, calling it "ShapeRef" +// would be a lot nicer, but then that conflicts with the terminology +// introduced above. So we need to come up with some new terminology +// for distinguishing things, which allows a reasonable class name too. class SparseTensorStorageBase { protected: // Since this class is virtual, we must disallow public copying in @@ -72,69 +116,92 @@ // that means making copying protected. // SparseTensorStorageBase(const SparseTensorStorageBase &) = default; - // Copy-assignment would be implicitly deleted (because `dimSizes` - // is const), so we explicitly delete it for clarity. + // Copy-assignment would be implicitly deleted (because our fields + // are const), so we explicitly delete it for clarity. SparseTensorStorageBase &operator=(const SparseTensorStorageBase &) = delete; public: - /// Constructs a new storage object. The `perm` maps the tensor's - /// semantic-ordering of dimensions to this object's storage-order. - /// The `dimSizes` and `sparsity` arrays are already in storage-order. + /// Constructs a new sparse-tensor storage object with the given encoding. /// - /// Precondition: `perm` and `sparsity` must be valid for `dimSizes.size()`. - SparseTensorStorageBase(const std::vector &dimSizes, - const uint64_t *perm, const DimLevelType *sparsity); + /// Preconditions: + /// * `dimSizes`, `lvlSizes`, `lvlTypes`, and `lvl2dim` must be nonnull. + /// * `dimSizes` must be valid for `dimRank`. + /// * `lvlSizes`, `lvlTypes`, and `lvl2dim` must be valid for `lvlRank`. + /// * `lvl2dim` must map indices valid for `lvlSizes` to indices valid + /// for `dimSizes`. + /// + /// Asserts: + /// * `dimRank` and `lvlRank` are nonzero. + /// * `dimSizes` and `lvlSizes` contain only nonzero sizes. + SparseTensorStorageBase(uint64_t dimRank, const uint64_t *dimSizes, + uint64_t lvlRank, const uint64_t *lvlSizes, + const DimLevelType *lvlTypes, + const uint64_t *lvl2dim); + // NOTE: For the most part we only need the `dimRank`. But we need + // `dimSizes` for `toCOO` to support the identity permutation nicely + // (i.e., without the caller needing to already know the tensor's + // dimension-sizes; e.g., as in `fromMLIRSparseTensor`). virtual ~SparseTensorStorageBase() = default; - /// Gets the rank of the tensor. - uint64_t getRank() const { return dimSizes.size(); } + /// Gets the number of tensor-dimensions. + uint64_t getDimRank() const { return dimSizes.size(); } - /// Gets the dimension-sizes array, in storage-order. + /// Gets the number of storage-levels. + uint64_t getLvlRank() const { return lvlSizes.size(); } + + /// Gets the tensor-dimension sizes array. const std::vector &getDimSizes() const { return dimSizes; } - /// Safely looks up the size of the given (storage-order) dimension. - uint64_t getDimSize(uint64_t d) const { - ASSERT_VALID_DIM(d); - return dimSizes[d]; + /// Gets the storage-level sizes array. + const std::vector &getLvlSizes() const { return lvlSizes; } + + /// Safely looks up the size of the given storage-level. + uint64_t getLvlSize(uint64_t l) const { + ASSERT_VALID_LVL(l); + return lvlSizes[l]; } - /// Gets the "reverse" permutation, which maps this object's - /// storage-order to the tensor's semantic-order. - const std::vector &getRev() const { return rev; } + /// Gets the level-to-dimension mapping. + const std::vector &getLvl2Dim() const { return lvl2dim; } - /// Gets the dimension-types array, in storage-order. - const std::vector &getDimTypes() const { return dimTypes; } + /// Gets the level-types array. + const std::vector &getLvlTypes() const { return lvlTypes; } - /// Safely looks up the level-type of the given (storage-order) dimension. - DimLevelType getDimType(uint64_t d) const { - ASSERT_VALID_DIM(d); - return dimTypes[d]; + /// Safely looks up the type of the given level. + DimLevelType getLvlType(uint64_t l) const { + ASSERT_VALID_LVL(l); + return lvlTypes[l]; } - /// Safely checks if the (storage-order) dimension uses dense storage. - bool isDenseDim(uint64_t d) const { return isDenseDLT(getDimType(d)); } + /// Safely checks if the level uses dense storage. + bool isDenseLvl(uint64_t l) const { return isDenseDLT(getLvlType(l)); } - /// Safely checks if the (storage-order) dimension uses compressed storage. - bool isCompressedDim(uint64_t d) const { - return isCompressedDLT(getDimType(d)); + /// Safely checks if the level uses compressed storage. + bool isCompressedLvl(uint64_t l) const { + return isCompressedDLT(getLvlType(l)); } - /// Safely checks if the (storage-order) dimension uses singleton storage. - bool isSingletonDim(uint64_t d) const { - return isSingletonDLT(getDimType(d)); + /// Safely checks if the level uses singleton storage. + bool isSingletonLvl(uint64_t l) const { + return isSingletonDLT(getLvlType(l)); } - /// Safely checks if the (storage-order) dimension is ordered. - bool isOrderedDim(uint64_t d) const { return isOrderedDLT(getDimType(d)); } + /// Safely checks if the level is ordered. + bool isOrderedLvl(uint64_t l) const { return isOrderedDLT(getLvlType(l)); } - /// Safely checks if the (storage-order) dimension is unique. - bool isUniqueDim(uint64_t d) const { return isUniqueDLT(getDimType(d)); } + /// Safely checks if the level is unique. + bool isUniqueLvl(uint64_t l) const { return isUniqueDLT(getLvlType(l)); } - /// Allocates a new enumerator. + /// Allocates a new enumerator. Callers must make sure to delete + /// the enumerator when they're done with it. The first argument + /// is the out-parameter for storing the newly allocated enumerator; + /// all other arguments are passed along to the `SparseTensorEnumerator` + /// ctor and must satisfy the preconditions/assertions thereof. #define DECL_NEWENUMERATOR(VNAME, V) \ virtual void newEnumerator(SparseTensorEnumeratorBase **, uint64_t, \ - const uint64_t *) const; + const uint64_t *, uint64_t, const uint64_t *) \ + const; MLIR_SPARSETENSOR_FOREVERY_V(DECL_NEWENUMERATOR) #undef DECL_NEWENUMERATOR @@ -149,19 +216,33 @@ virtual void getIndices(std::vector **, uint64_t); MLIR_SPARSETENSOR_FOREVERY_FIXED_O(DECL_GETINDICES) #undef DECL_GETINDICES - virtual uint64_t getIndex(uint64_t d, uint64_t pos) const = 0; + virtual uint64_t getIndex(uint64_t l, uint64_t pos) const = 0; /// Gets primary storage. #define DECL_GETVALUES(VNAME, V) virtual void getValues(std::vector **); MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETVALUES) #undef DECL_GETVALUES - /// Element-wise insertion in lexicographic index order. + /// Element-wise insertion in lexicographic index order. The first + /// argument is the level-indices for the value being inserted. + // TODO: For better safety, this should take a parameter for the + // length of `lvlInd` and check that against `getLvlRank()`. #define DECL_LEXINSERT(VNAME, V) virtual void lexInsert(const uint64_t *, V); MLIR_SPARSETENSOR_FOREVERY_V(DECL_LEXINSERT) #undef DECL_LEXINSERT - /// Expanded insertion. + /// Expanded insertion. Note that this method resets the + /// values/filled-switch array back to all-zero/false while only + /// iterating over the nonzero elements. + /// + /// Arguments: + /// * `lvlInd` the level-indices shared by the values being inserted. + /// * `values` a map from last-level coordinates to their associated value. + /// * `filled` a map from last-level coordinates to bool, indicating + /// whether `values` contains a valid value to be inserted. + /// * `added` a map from `[0..count)` to last-level coordinates for + /// which `filled` is true and `values` contains the assotiated value. + /// * `count` the size of `added`. #define DECL_EXPINSERT(VNAME, V) \ virtual void expInsert(uint64_t *, V *, bool *, uint64_t *, uint64_t); MLIR_SPARSETENSOR_FOREVERY_V(DECL_EXPINSERT) @@ -172,8 +253,9 @@ private: const std::vector dimSizes; - std::vector rev; // conceptually `const` - const std::vector dimTypes; + const std::vector lvlSizes; + const std::vector lvlTypes; + const std::vector lvl2dim; }; //===----------------------------------------------------------------------===// @@ -193,107 +275,179 @@ class SparseTensorStorage final : public SparseTensorStorageBase { /// Private constructor to share code between the other constructors. /// Beware that the object is not necessarily guaranteed to be in a - /// valid state after this constructor alone; e.g., `isCompressedDim(d)` - /// doesn't entail `!(pointers[d].empty())`. + /// valid state after this constructor alone; e.g., `isCompressedLvl(l)` + /// doesn't entail `!(pointers[l].empty())`. /// - /// Precondition: `perm` and `sparsity` must be valid for `dimSizes.size()`. - SparseTensorStorage(const std::vector &dimSizes, - const uint64_t *perm, const DimLevelType *sparsity) - : SparseTensorStorageBase(dimSizes, perm, sparsity), pointers(getRank()), - indices(getRank()), idx(getRank()) {} + /// Preconditions/assertions are as per the `SparseTensorStorageBase` ctor. + SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes, + uint64_t lvlRank, const uint64_t *lvlSizes, + const DimLevelType *lvlTypes, const uint64_t *lvl2dim) + : SparseTensorStorageBase(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, + lvl2dim), + pointers(lvlRank), indices(lvlRank), lvlCursor(lvlRank) {} public: - /// Constructs a sparse tensor storage scheme with the given dimensions, - /// permutation, and per-dimension dense/sparse annotations, using - /// the coordinate scheme tensor for the initial contents if provided. + /// Constructs a sparse tensor with the given encoding, and allocates + /// overhead storage according to some simple heuristics. When the + /// `bool` argument is true and `lvlTypes` are all dense, then this + /// ctor will also initialize the values array with zeros. That + /// argument should be true when an empty tensor is intended; whereas + /// it should usually be false when the ctor will be followed up by + /// some other form of initialization. /// - /// Precondition: `perm` and `sparsity` must be valid for `dimSizes.size()`. - SparseTensorStorage(const std::vector &dimSizes, - const uint64_t *perm, const DimLevelType *sparsity, - SparseTensorCOO *coo); + /// Preconditions/assertions are as per the `SparseTensorStorageBase` ctor. + SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes, + uint64_t lvlRank, const uint64_t *lvlSizes, + const DimLevelType *lvlTypes, const uint64_t *lvl2dim, + bool initializeValuesIfAllDense); - /// Constructs a sparse tensor storage scheme with the given dimensions, - /// permutation, and per-dimension dense/sparse annotations, using - /// the given sparse tensor for the initial contents. + /// Constructs a sparse tensor with the given encoding, and initializes + /// the contents from the COO. This ctor performs the same heuristic + /// overhead-storage allocation as the ctor taking a `bool`, and + /// has the same preconditions/assertions (where we define `lvlSizes = + /// lvlCOO.getDimSizes().data()`), with the following addition: /// - /// Preconditions: - /// * `perm` and `sparsity` must be valid for `dimSizes.size()`. - /// * The `tensor` must have the same value type `V`. - SparseTensorStorage(const std::vector &dimSizes, - const uint64_t *perm, const DimLevelType *sparsity, - const SparseTensorStorageBase &tensor); + /// Asserts: + /// * `lvlRank == lvlCOO.getRank()`. + SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes, + uint64_t lvlRank, const DimLevelType *lvlTypes, + const uint64_t *lvl2dim, SparseTensorCOO &lvlCOO); + + /// Constructs a sparse tensor with the given encoding, and initializes + /// the contents from the enumerator. This ctor allocates exactly + /// the required amount of overhead storage, not using any heuristics. + /// Preconditions/assertions are as per the `SparseTensorStorageBase` + /// ctor (where we define `lvlSizes = lvlEnumerator.getTrgSizes().data()`), + /// with the following addition: + /// + /// Asserts: + /// * `lvlRank == lvlEnumerator.getTrgRank()`. + SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes, + uint64_t lvlRank, const DimLevelType *lvlTypes, + const uint64_t *lvl2dim, + SparseTensorEnumeratorBase &lvlEnumerator); - /// Factory method. Constructs a sparse tensor storage scheme with the given - /// dimensions, permutation, and per-dimension dense/sparse annotations, - /// using the coordinate scheme tensor for the initial contents if provided. - /// In the latter case, the coordinate scheme must respect the same - /// permutation as is desired for the new sparse tensor storage. + /// Allocates a new empty sparse tensor. The preconditions/assertions + /// are as per the `SparseTensorStorageBase` ctor; which is to say, + /// the `dimSizes` and `lvlSizes` must both be "sizes" not "shapes", + /// since there's nowhere to reconstruct dynamic sizes from. + static SparseTensorStorage * + newEmpty(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, + const uint64_t *lvlSizes, const DimLevelType *lvlTypes, + const uint64_t *lvl2dim) { + return new SparseTensorStorage(dimRank, dimSizes, lvlRank, + lvlSizes, lvlTypes, lvl2dim, true); + } + + /// Allocates a new sparse tensor and initializes it from the given COO. + /// The preconditions are as per the `SparseTensorStorageBase` ctor + /// (where we define `lvlSizes = lvlCOO.getDimSizes().data()`), but + /// using the following assertions in lieu of the base ctor's assertions: /// - /// Precondition: `shape`, `perm`, and `sparsity` must be valid for `rank`. + /// Asserts: + /// * `dimRank` and `lvlRank` are nonzero. + /// * `lvlRank == lvlCOO.getRank()`. + /// * `lvlCOO.getDimSizes()` under the `lvl2dim` mapping is a refinement + /// of `dimShape`. + // + // TODO: The ability to reconstruct dynamic dimensions-sizes does not + // easily generalize to arbitrary `lvl2dim` mappings. When compiling + // MLIR programs to use this library, we should be able to generate + // code for effectively computing the reconstruction, but it's not clear + // that there's a feasible way to do so from within the library itself. + // Therefore, when we functionalize the `lvl2dim` mapping we'll have + // to update the type/preconditions of this factory too. static SparseTensorStorage * - newSparseTensor(uint64_t rank, const uint64_t *shape, const uint64_t *perm, - const DimLevelType *sparsity, SparseTensorCOO *coo); + newFromCOO(uint64_t dimRank, const uint64_t *dimShape, uint64_t lvlRank, + const DimLevelType *lvlTypes, const uint64_t *lvl2dim, + SparseTensorCOO &lvlCOO); - /// Factory method. Constructs a sparse tensor storage scheme with - /// the given dimensions, permutation, and per-dimension dense/sparse - /// annotations, using the sparse tensor for the initial contents. + /// Allocates a new sparse tensor and initializes it with the contents + /// of another sparse tensor. /// /// Preconditions: - /// * `shape`, `perm`, and `sparsity` must be valid for `rank`. - /// * The `tensor` must have the same value type `V`. + /// * as per the `SparseTensorStorageBase` ctor. + /// * `src2lvl` must be valid for `srcRank`, must map indices valid for + /// `source.getDimSizes()` to indices valid for `lvlSizes`, and therefore + /// must be the inverse of `lvl2dim`. + /// * `source` must have the same value type `V`. + /// + /// Asserts: + /// * `dimRank` and `lvlRank` are nonzero. + /// * `srcRank == source.getDimRank()`. + /// * `lvlSizes` contains only nonzero sizes. + /// * `source.getDimSizes()` is a refinement of `dimShape`. + // + // TODO: The `dimRank` and `dimShape` arguments are only used for + // verifying that the source tensor has the expected shape. So if we + // wanted to skip that verification, then we could remove those arguments. + // Alternatively, if we required the `dimShape` to be "sizes" instead, + // then that would remove any constraints on `source.getDimSizes()` + // (other than compatibility with `src2lvl`) as well as removing the + // requirement that `src2lvl` be the inverse of `lvl2dim`. Which would + // enable this factory to be used for performing a much larger class of + // transformations (which can already be handled by the `SparseTensorNNZ` + // implementation). static SparseTensorStorage * - newSparseTensor(uint64_t rank, const uint64_t *shape, const uint64_t *perm, - const DimLevelType *sparsity, - const SparseTensorStorageBase *source); + newFromSparseTensor(uint64_t dimRank, const uint64_t *dimShape, + uint64_t lvlRank, const uint64_t *lvlSizes, + const DimLevelType *lvlTypes, const uint64_t *lvl2dim, + uint64_t srcRank, const uint64_t *src2lvl, + const SparseTensorStorageBase &source); ~SparseTensorStorage() final = default; /// Partially specialize these getter methods based on template types. - void getPointers(std::vector

**out, uint64_t d) final { - ASSERT_VALID_DIM(d); - *out = &pointers[d]; + void getPointers(std::vector

**out, uint64_t l) final { + assert(out && "Received nullptr for out parameter"); + ASSERT_VALID_LVL(l); + *out = &pointers[l]; } - void getIndices(std::vector **out, uint64_t d) final { - ASSERT_VALID_DIM(d); - *out = &indices[d]; + void getIndices(std::vector **out, uint64_t l) final { + assert(out && "Received nullptr for out parameter"); + ASSERT_VALID_LVL(l); + *out = &indices[l]; + } + void getValues(std::vector **out) final { + assert(out && "Received nullptr for out parameter"); + *out = &values; } - void getValues(std::vector **out) final { *out = &values; } - uint64_t getIndex(uint64_t d, uint64_t pos) const final { - ASSERT_COMPRESSED_OR_SINGLETON_DIM(d); - assert(pos < indices[d].size() && "Index position is out of bounds"); - return indices[d][pos]; // Converts the stored `I` into `uint64_t`. + uint64_t getIndex(uint64_t l, uint64_t pos) const final { + ASSERT_COMPRESSED_OR_SINGLETON_LVL(l); + assert(pos < indices[l].size() && "Index position is out of bounds"); + return indices[l][pos]; // Converts the stored `I` into `uint64_t`. } /// Partially specialize lexicographical insertions based on template types. - void lexInsert(const uint64_t *cursor, V val) final { + void lexInsert(const uint64_t *lvlInd, V val) final { + assert(lvlInd && "Received nullptr for level-indices"); // First, wrap up pending insertion path. - uint64_t diff = 0; - uint64_t top = 0; + uint64_t diffLvl = 0; + uint64_t topIdx = 0; if (!values.empty()) { - diff = lexDiff(cursor); - endPath(diff + 1); - top = idx[diff] + 1; + diffLvl = lexDiff(lvlInd); + endPath(diffLvl + 1); + topIdx = lvlCursor[diffLvl] + 1; } // Then continue with insertion path. - insPath(cursor, diff, top, val); + insPath(lvlInd, diffLvl, topIdx, val); } /// Partially specialize expanded insertions based on template types. - /// Note that this method resets the values/filled-switch array back - /// to all-zero/false while only iterating over the nonzero elements. - void expInsert(uint64_t *cursor, V *values, bool *filled, uint64_t *added, + void expInsert(uint64_t *lvlInd, V *values, bool *filled, uint64_t *added, uint64_t count) final { + assert((lvlInd && values && filled && added) && "Received nullptr"); if (count == 0) return; // Sort. std::sort(added, added + count); // Restore insertion path for first insert. - const uint64_t lastDim = getRank() - 1; + const uint64_t lastLvl = getLvlRank() - 1; uint64_t index = added[0]; assert(filled[index] && "added index is not filled"); - cursor[lastDim] = index; - lexInsert(cursor, values[index]); + lvlInd[lastLvl] = index; + lexInsert(lvlInd, values[index]); values[index] = 0; filled[index] = false; // Subsequent insertions are quick. @@ -301,8 +455,8 @@ assert(index < added[i] && "non-lexicographic insertion"); index = added[i]; assert(filled[index] && "added index is not filled"); - cursor[lastDim] = index; - insPath(cursor, lastDim, added[i - 1] + 1, values[index]); + lvlInd[lastLvl] = index; + insPath(lvlInd, lastLvl, added[i - 1] + 1, values[index]); values[index] = 0; filled[index] = false; } @@ -316,103 +470,110 @@ endPath(0); } - /// Allocates a new enumerator for this classes `` types and + /// Allocates a new enumerator for this class's `` types and /// erase the `` parts from the type. Callers must make sure to /// delete the enumerator when they're done with it. - void newEnumerator(SparseTensorEnumeratorBase **out, uint64_t rank, - const uint64_t *perm) const final { - *out = new SparseTensorEnumerator(*this, rank, perm); + void newEnumerator(SparseTensorEnumeratorBase **out, uint64_t trgRank, + const uint64_t *trgSizes, uint64_t srcRank, + const uint64_t *src2trg) const final { + assert(out && "Received nullptr for out parameter"); + *out = new SparseTensorEnumerator(*this, trgRank, trgSizes, + srcRank, src2trg); } - /// Returns this sparse tensor storage scheme as a new memory-resident - /// sparse tensor in coordinate scheme with the given dimension order. + /// Allocates a new COO object and initializes it with the contents + /// of this tensor under the given mapping from the `getDimSizes()` + /// coordinate-space to the `trgSizes` coordinate-space. Callers must + /// make sure to delete the COO when they're done with it. /// - /// Precondition: `perm` must be valid for `getRank()`. - SparseTensorCOO *toCOO(const uint64_t *perm) const { - SparseTensorEnumeratorBase *enumerator; - newEnumerator(&enumerator, getRank(), perm); - SparseTensorCOO *coo = - new SparseTensorCOO(enumerator->permutedSizes(), values.size()); - enumerator->forallElements([&coo](const std::vector &ind, V val) { - coo->add(ind, val); - }); + /// Preconditions/assertions are as per the `SparseTensorEnumerator` ctor. + SparseTensorCOO *toCOO(uint64_t trgRank, const uint64_t *trgSizes, + uint64_t srcRank, const uint64_t *src2trg) const { + // We inline `newEnumerator` to avoid virtual dispatch and allocation. + SparseTensorEnumerator enumerator(*this, trgRank, trgSizes, + srcRank, src2trg); + auto *coo = new SparseTensorCOO(trgRank, trgSizes, values.size()); + enumerator.forallElements( + [&coo](const auto &trgInd, V val) { coo->add(trgInd, val); }); // TODO: This assertion assumes there are no stored zeros, // or if there are then that we don't filter them out. // Cf., assert(coo->getElements().size() == values.size()); - delete enumerator; return coo; } private: - /// Appends an arbitrary new position to `pointers[d]`. This method + /// Appends an arbitrary new position to `pointers[l]`. This method /// checks that `pos` is representable in the `P` type; however, it /// does not check that `pos` is semantically valid (i.e., larger than - /// the previous position and smaller than `indices[d].capacity()`). - void appendPointer(uint64_t d, uint64_t pos, uint64_t count = 1) { - ASSERT_COMPRESSED_DIM(d); + /// the previous position and smaller than `indices[l].capacity()`). + void appendPointer(uint64_t l, uint64_t pos, uint64_t count = 1) { + ASSERT_COMPRESSED_LVL(l); assert(pos <= std::numeric_limits

::max() && "Pointer value is too large for the P-type"); - pointers[d].insert(pointers[d].end(), count, static_cast

(pos)); + pointers[l].insert(pointers[l].end(), count, static_cast

(pos)); } - /// Appends index `i` to dimension `d`, in the semantically general - /// sense. For non-dense dimensions, that means appending to the - /// `indices[d]` array, checking that `i` is representable in the `I` - /// type; however, we do not verify other semantic requirements (e.g., - /// that `i` is in bounds for `dimSizes[d]`, and not previously occurring - /// in the same segment). For dense dimensions, this method instead - /// appends the appropriate number of zeros to the `values` array, - /// where `full` is the number of "entries" already written to `values` - /// for this segment (aka one after the highest index previously appended). - void appendIndex(uint64_t d, uint64_t full, uint64_t i) { - if (isCompressedDim(d) || isSingletonDim(d)) { + /// Appends index `i` to level `l`, in the semantically general sense. + /// For non-dense levels, that means appending to the `indices[l]` array, + /// checking that `i` is representable in the `I` type; however, we do + /// not verify other semantic requirements (e.g., that `i` is in bounds + /// for `lvlSizes[l]`, and not previously occurring in the same segment). + /// For dense levels, this method instead appends the appropriate number + /// of zeros to the `values` array, where `full` is the number of "entries" + /// already written to `values` for this segment (aka one after the highest + /// index previously appended). + void appendIndex(uint64_t l, uint64_t full, uint64_t i) { + const auto dlt = getLvlType(l); // Avoid redundant bounds checking. + if (isCompressedDLT(dlt) || isSingletonDLT(dlt)) { assert(i <= std::numeric_limits::max() && "Index value is too large for the I-type"); - indices[d].push_back(static_cast(i)); + indices[l].push_back(static_cast(i)); } else { // Dense dimension. - ASSERT_DENSE_DIM(d); + ASSERT_DENSE_DLT(dlt); assert(i >= full && "Index was already filled"); if (i == full) return; // Short-circuit, since it'll be a nop. - if (d + 1 == getRank()) + if (l + 1 == getLvlRank()) values.insert(values.end(), i - full, 0); else - finalizeSegment(d + 1, 0, i - full); + finalizeSegment(l + 1, 0, i - full); } } - /// Writes the given coordinate to `indices[d][pos]`. This method + /// Writes the given coordinate to `indices[l][pos]`. This method /// checks that `i` is representable in the `I` type; however, it /// does not check that `i` is semantically valid (i.e., in bounds - /// for `dimSizes[d]` and not elsewhere occurring in the same segment). - void writeIndex(uint64_t d, uint64_t pos, uint64_t i) { - ASSERT_COMPRESSED_OR_SINGLETON_DIM(d); + /// for `dimSizes[l]` and not elsewhere occurring in the same segment). + void writeIndex(uint64_t l, uint64_t pos, uint64_t i) { + ASSERT_COMPRESSED_OR_SINGLETON_LVL(l); // Subscript assignment to `std::vector` requires that the `pos`-th // entry has been initialized; thus we must be sure to check `size()` // here, instead of `capacity()` as would be ideal. - assert(pos < indices[d].size() && "Index position is out of bounds"); + assert(pos < indices[l].size() && "Index position is out of bounds"); assert(i <= std::numeric_limits::max() && "Index value is too large for the I-type"); - indices[d][pos] = static_cast(i); + indices[l][pos] = static_cast(i); } - /// Computes the assembled-size associated with the `d`-th dimension, - /// given the assembled-size associated with the `(d-1)`-th dimension. + /// Computes the assembled-size associated with the `l`-th level, + /// given the assembled-size associated with the `(l-1)`-th level. /// "Assembled-sizes" correspond to the (nominal) sizes of overhead - /// storage, as opposed to "dimension-sizes" which are the cardinality - /// of coordinates for that dimension. + /// storage, as opposed to "level-sizes" which are the cardinality + /// of possible coordinates for that level. /// - /// Precondition: the `pointers[d]` array must be fully initialized + /// Precondition: the `pointers[l]` array must be fully initialized /// before calling this method. - uint64_t assembledSize(uint64_t parentSz, uint64_t d) const { - if (isCompressedDim(d)) - return pointers[d][parentSz]; - if (isSingletonDim(d)) + uint64_t assembledSize(uint64_t parentSz, uint64_t l) const { + const auto dlt = getLvlType(l); // Avoid redundant bounds checking. + if (isCompressedDLT(dlt)) + return pointers[l][parentSz]; + if (isSingletonDLT(dlt)) return parentSz; // New size is same as the parent. - if (isDenseDim(d)) - return parentSz * getDimSizes()[d]; - MLIR_SPARSETENSOR_FATAL("unsupported dimension level type"); + if (isDenseDLT(dlt)) + return parentSz * getLvlSizes()[l]; + MLIR_SPARSETENSOR_FATAL("unsupported level type: %d\n", + static_cast(dlt)); } /// Initializes sparse tensor storage scheme from a memory-resident sparse @@ -420,95 +581,101 @@ /// indices arrays under the given per-dimension dense/sparse annotations. /// /// Preconditions: - /// (1) the `elements` must be lexicographically sorted. - /// (2) the indices of every element are valid for `dimSizes` (equal rank - /// and pointwise less-than). - void fromCOO(const std::vector> &elements, uint64_t lo, - uint64_t hi, uint64_t d) { - const uint64_t rank = getRank(); - assert(d <= rank && hi <= elements.size()); + /// * the `lvlElements` must be lexicographically sorted. + /// * the indices of every element are valid for `getLvlSizes()` + /// (i.e., equal rank and pointwise less-than). + void fromCOO(const std::vector> &lvlElements, uint64_t lo, + uint64_t hi, uint64_t l) { + const uint64_t lvlRank = getLvlRank(); + assert(l <= lvlRank && hi <= lvlElements.size()); // Once dimensions are exhausted, insert the numerical values. - if (d == rank) { + if (l == lvlRank) { assert(lo < hi); - values.push_back(elements[lo].value); + values.push_back(lvlElements[lo].value); return; } // Visit all elements in this interval. uint64_t full = 0; - while (lo < hi) { // If `hi` is unchanged, then `lo < elements.size()`. - // Find segment in interval with same index elements in this dimension. - const uint64_t i = elements[lo].indices[d]; + while (lo < hi) { // If `hi` is unchanged, then `lo < lvlElements.size()`. + // Find segment in interval with same index elements in this level. + const uint64_t i = lvlElements[lo].indices[l]; uint64_t seg = lo + 1; - if (isUniqueDim(d)) - while (seg < hi && elements[seg].indices[d] == i) + if (isUniqueLvl(l)) + while (seg < hi && lvlElements[seg].indices[l] == i) ++seg; - // Handle segment in interval for sparse or dense dimension. - appendIndex(d, full, i); + // Handle segment in interval for sparse or dense level. + appendIndex(l, full, i); full = i + 1; - fromCOO(elements, lo, seg, d + 1); + fromCOO(lvlElements, lo, seg, l + 1); // And move on to next segment in interval. lo = seg; } - // Finalize the sparse pointer structure at this dimension. - finalizeSegment(d, full); + // Finalize the sparse pointer structure at this level. + finalizeSegment(l, full); } - /// Finalizes the sparse pointer structure at this dimension. - void finalizeSegment(uint64_t d, uint64_t full = 0, uint64_t count = 1) { + /// Finalizes the sparse pointer structure at this level. + void finalizeSegment(uint64_t l, uint64_t full = 0, uint64_t count = 1) { if (count == 0) return; // Short-circuit, since it'll be a nop. - if (isCompressedDim(d)) { - appendPointer(d, indices[d].size(), count); - } else if (isSingletonDim(d)) { + const auto dlt = getLvlType(l); // Avoid redundant bounds checking. + if (isCompressedDLT(dlt)) { + appendPointer(l, indices[l].size(), count); + } else if (isSingletonDLT(dlt)) { return; // Nothing to finalize. - } else { // Dense dimension. - ASSERT_DENSE_DIM(d); - const uint64_t sz = getDimSizes()[d]; + } else { // Dense dimension. + ASSERT_DENSE_DLT(dlt); + const uint64_t sz = getLvlSizes()[l]; assert(sz >= full && "Segment is overfull"); count = detail::checkedMul(count, sz - full); // For dense storage we must enumerate all the remaining coordinates - // in this dimension (i.e., coordinates after the last non-zero + // in this level (i.e., coordinates after the last non-zero // element), and either fill in their zero values or else recurse - // to finalize some deeper dimension. - if (d + 1 == getRank()) + // to finalize some deeper level. + if (l + 1 == getLvlRank()) values.insert(values.end(), count, 0); else - finalizeSegment(d + 1, 0, count); + finalizeSegment(l + 1, 0, count); } } /// Wraps up a single insertion path, inner to outer. - void endPath(uint64_t diff) { - const uint64_t rank = getRank(); - assert(diff <= rank && "Dimension-diff is out of bounds"); - for (uint64_t i = 0; i < rank - diff; ++i) { - const uint64_t d = rank - i - 1; - finalizeSegment(d, idx[d] + 1); + void endPath(uint64_t diffLvl) { + const uint64_t lvlRank = getLvlRank(); + const uint64_t lastLvl = lvlRank - 1; + assert(diffLvl <= lvlRank && "Level-diff is out of bounds"); + const uint64_t stop = lvlRank - diffLvl; + for (uint64_t i = 0; i < stop; ++i) { + const uint64_t l = lastLvl - i; + finalizeSegment(l, lvlCursor[l] + 1); } } - /// Continues a single insertion path, outer to inner. - void insPath(const uint64_t *cursor, uint64_t diff, uint64_t top, V val) { - const uint64_t rank = getRank(); - assert(diff <= rank && "Dimension-diff is out of bounds"); - for (uint64_t d = diff; d < rank; ++d) { - const uint64_t i = cursor[d]; - appendIndex(d, top, i); - top = 0; - idx[d] = i; + /// Continues a single insertion path, outer to inner. The first + /// argument is the storage-level indices for the value being inserted. + void insPath(const uint64_t *lvlInd, uint64_t diffLvl, uint64_t topIdx, + V val) { + const uint64_t lvlRank = getLvlRank(); + assert(diffLvl <= lvlRank && "Level-diff is out of bounds"); + for (uint64_t l = diffLvl; l < lvlRank; ++l) { + const uint64_t i = lvlInd[l]; + appendIndex(l, topIdx, i); + topIdx = 0; + lvlCursor[l] = i; } values.push_back(val); } - /// Finds the lexicographic differing dimension. - uint64_t lexDiff(const uint64_t *cursor) const { - const uint64_t rank = getRank(); - for (uint64_t r = 0; r < rank; ++r) - if (cursor[r] > idx[r]) - return r; + /// Finds the lexicographically first level where the level-indices + /// in the argument differ from those in the current cursor. + uint64_t lexDiff(const uint64_t *lvlInd) const { + const uint64_t lvlRank = getLvlRank(); + for (uint64_t l = 0; l < lvlRank; ++l) + if (lvlInd[l] > lvlCursor[l]) + return l; else - assert(cursor[r] == idx[r] && "non-lexicographic insertion"); - assert(0 && "duplication insertion"); + assert(lvlInd[l] == lvlCursor[l] && "non-lexicographic insertion"); + assert(0 && "duplicate insertion"); return -1u; } @@ -520,12 +687,12 @@ std::vector> pointers; std::vector> indices; std::vector values; - std::vector idx; // index cursor for lexicographic insertion. + std::vector lvlCursor; // cursor for lexicographic insertion. }; -#undef ASSERT_COMPRESSED_OR_SINGLETON_DIM -#undef ASSERT_COMPRESSED_DIM -#undef ASSERT_VALID_DIM +#undef ASSERT_COMPRESSED_OR_SINGLETON_LVL +#undef ASSERT_COMPRESSED_LVL +#undef ASSERT_VALID_LVL //===----------------------------------------------------------------------===// /// A (higher-order) function object for enumerating the elements of some @@ -549,27 +716,35 @@ /// than simply using this class, is to avoid the cost of virtual-method /// dispatch within the loop-nest. template -class SparseTensorEnumeratorBase { +class MLIR_SPARSETENSOR_GSL_POINTER [[nodiscard]] SparseTensorEnumeratorBase { public: - /// Constructs an enumerator with the given permutation for mapping - /// the semantic-ordering of dimensions to the desired target-ordering. + /// Constructs an enumerator which automatically applies the given + /// mapping from the source tensor's dimensions to the desired + /// target tensor dimensions. /// /// Preconditions: - /// * the `tensor` must have the same `V` value type. - /// * `perm` must be valid for `rank`. - SparseTensorEnumeratorBase(const SparseTensorStorageBase &tensor, - uint64_t rank, const uint64_t *perm) - : src(tensor), permsz(src.getRev().size()), reord(getRank()), - cursor(getRank()) { - assert(perm && "Received nullptr for permutation"); - assert(rank == getRank() && "Permutation rank mismatch"); - const auto &rev = src.getRev(); // source-order -> semantic-order - const auto &dimSizes = src.getDimSizes(); // in source storage-order - for (uint64_t s = 0; s < rank; ++s) { // `s` source storage-order - uint64_t t = perm[rev[s]]; // `t` target-order - reord[s] = t; - permsz[t] = dimSizes[s]; - } + /// * the `src` must have the same `V` value type. + /// * `trgSizes` must be valid for `trgRank`. + /// * `src2trg` must be valid for `srcRank`, and must map indices + /// valid for `src.getDimSizes()` to indices valid for `trgSizes`. + /// + /// Asserts: + /// * `trgSizes` must be nonnull and must contain only nonzero sizes. + /// * `srcRank == src.getDimRank()`. + /// * `src2trg` must be nonnull. + SparseTensorEnumeratorBase(const SparseTensorStorageBase &src, + uint64_t trgRank, const uint64_t *trgSizes, + uint64_t srcRank, const uint64_t *src2trg) + : src(src), trgSizes(trgSizes, trgSizes + trgRank), + lvl2trg(src.getLvlRank()), trgCursor(trgRank) { + assert(trgSizes && "Received nullptr for target-sizes"); + assert(src2trg && "Received nullptr for source-to-target mapping"); + assert(srcRank == src.getDimRank() && "Source-rank mismatch"); + for (uint64_t t = 0; t < trgRank; ++t) + assert(trgSizes[t] > 0 && "Target-size zero has trivial storage"); + const auto &lvl2src = src.getLvl2Dim(); + for (uint64_t lvlRank = src.getLvlRank(), l = 0; l < lvlRank; ++l) + lvl2trg[l] = src2trg[lvl2src[l]]; } virtual ~SparseTensorEnumeratorBase() = default; @@ -580,14 +755,18 @@ SparseTensorEnumeratorBase & operator=(const SparseTensorEnumeratorBase &) = delete; - /// Returns the source/target tensor's rank. (The source-rank and - /// target-rank are always equal since we only support permutations. - /// Though once we add support for other dimension mappings, this - /// method will have to be split in two.) - uint64_t getRank() const { return permsz.size(); } + /// Gets the source's dimension-rank. + uint64_t getSrcDimRank() const { return src.getDimRank(); } - /// Returns the target tensor's dimension sizes. - const std::vector &permutedSizes() const { return permsz; } + /// Gets the target's dimension-/level-rank. (This is usually + /// "dimension-rank", though that may coincide with "level-rank" + /// depending on usage.) + uint64_t getTrgRank() const { return trgSizes.size(); } + + /// Gets the target's dimension-/level-sizes. (These are usually + /// "dimensions", though that may coincide with "level-rank" depending + /// on usage.) + const std::vector &getTrgSizes() const { return trgSizes; } /// Enumerates all elements of the source tensor, permutes their /// indices, and passes the permuted element to the callback. @@ -598,25 +777,28 @@ protected: const SparseTensorStorageBase &src; - std::vector permsz; // in target order. - std::vector reord; // source storage-order -> target order. - std::vector cursor; // in target order. + std::vector trgSizes; // in target order. + std::vector lvl2trg; // source-levels -> target-dims/lvls. + std::vector trgCursor; // in target order. }; //===----------------------------------------------------------------------===// template -class SparseTensorEnumerator final : public SparseTensorEnumeratorBase { +class MLIR_SPARSETENSOR_GSL_POINTER [[nodiscard]] SparseTensorEnumerator final + : public SparseTensorEnumeratorBase { using Base = SparseTensorEnumeratorBase; using StorageImpl = SparseTensorStorage; public: - /// Constructs an enumerator with the given permutation for mapping - /// the semantic-ordering of dimensions to the desired target-ordering. + /// Constructs an enumerator which automatically applies the given + /// mapping from the source tensor's dimensions to the desired + /// target tensor dimensions. /// - /// Precondition: `perm` must be valid for `rank`. - SparseTensorEnumerator(const StorageImpl &tensor, uint64_t rank, - const uint64_t *perm) - : Base(tensor, rank, perm) {} + /// Preconditions/assertions are as per the `SparseTensorEnumeratorBase` ctor. + SparseTensorEnumerator(const StorageImpl &src, uint64_t trgRank, + const uint64_t *trgSizes, uint64_t srcRank, + const uint64_t *src2trg) + : Base(src, trgRank, trgSizes, srcRank, src2trg) {} ~SparseTensorEnumerator() final = default; @@ -625,46 +807,55 @@ } private: + // TODO: Once we functionalize the mappings, then we'll no longer + // be able to use the current approach of constructing `lvl2trg` in the + // ctor and using it to incrementally fill the `trgCursor` cursor as we + // recurse through `forallElements`. Instead we'll want to incrementally + // fill a `lvlCursor` as we recurse, and then use `src.getLvl2Dim()` + // and `src2trg` to convert it just before yielding to the callback. + // It's probably most efficient to just store the `srcCursor` and + // `trgCursor` buffers in this object, but we may want to benchmark + // that against using `std::calloc` to stack-allocate them instead. + // /// The recursive component of the public `forallElements`. void forallElements(ElementConsumer yield, uint64_t parentPos, - uint64_t d) { + uint64_t l) { // Recover the `` type parameters of `src`. const auto &src = static_cast(this->src); - if (d == Base::getRank()) { + if (l == src.getLvlRank()) { assert(parentPos < src.values.size() && "Value position is out of bounds"); // TODO: - yield(this->cursor, src.values[parentPos]); + yield(this->trgCursor, src.values[parentPos]); return; } - const auto dlt = src.getDimType(d); + uint64_t &cursorL = this->trgCursor[this->lvl2trg[l]]; + const auto dlt = src.getLvlType(l); // Avoid redundant bounds checking. if (isCompressedDLT(dlt)) { - // Look up the bounds of the `d`-level segment determined by the - // `d-1`-level position `parentPos`. - const std::vector

&pointersD = src.pointers[d]; - assert(parentPos + 1 < pointersD.size() && + // Look up the bounds of the `l`-level segment determined by the + // `(l - 1)`-level position `parentPos`. + const std::vector

&pointersL = src.pointers[l]; + assert(parentPos + 1 < pointersL.size() && "Parent pointer position is out of bounds"); - const uint64_t pstart = static_cast(pointersD[parentPos]); - const uint64_t pstop = static_cast(pointersD[parentPos + 1]); - // Loop-invariant code for looking up the `d`-level coordinates/indices. - const std::vector &indicesD = src.indices[d]; - assert(pstop <= indicesD.size() && "Index position is out of bounds"); - uint64_t &cursorReordD = this->cursor[this->reord[d]]; + const uint64_t pstart = static_cast(pointersL[parentPos]); + const uint64_t pstop = static_cast(pointersL[parentPos + 1]); + // Loop-invariant code for looking up the `l`-level coordinates/indices. + const std::vector &indicesL = src.indices[l]; + assert(pstop <= indicesL.size() && "Index position is out of bounds"); for (uint64_t pos = pstart; pos < pstop; ++pos) { - cursorReordD = static_cast(indicesD[pos]); - forallElements(yield, pos, d + 1); + cursorL = static_cast(indicesL[pos]); + forallElements(yield, pos, l + 1); } } else if (isSingletonDLT(dlt)) { - this->cursor[this->reord[d]] = src.getIndex(d, parentPos); - forallElements(yield, parentPos, d + 1); - } else { - assert(isDenseDLT(dlt)); // TODO: reuse the ASSERT_DENSE_DIM message - const uint64_t sz = src.getDimSizes()[d]; + cursorL = src.getIndex(l, parentPos); + forallElements(yield, parentPos, l + 1); + } else { // Dense dimension. + ASSERT_DENSE_DLT(dlt); + const uint64_t sz = src.getLvlSizes()[l]; const uint64_t pstart = parentPos * sz; - uint64_t &cursorReordD = this->cursor[this->reord[d]]; for (uint64_t i = 0; i < sz; ++i) { - cursorReordD = i; - forallElements(yield, pstart + i, d + 1); + cursorL = i; + forallElements(yield, pstart + i, l + 1); } } } @@ -678,31 +869,39 @@ /// N.B., this class stores references to the parameters passed to /// the constructor; thus, objects of this class must not outlive /// those parameters. -class SparseTensorNNZ final { +/// +/// This class does not have the "dimension" vs "level" distinction, but +/// since it is used for initializing the levels of a `SparseTensorStorage` +/// object, we use the "level" name throughout for the sake of consistency. +class MLIR_SPARSETENSOR_GSL_POINTER [[nodiscard]] SparseTensorNNZ final { public: - /// Allocates the statistics structure for the desired sizes and - /// sparsity (in the target tensor's storage-order). This constructor - /// does not actually populate the statistics, however; for that see - /// `initialize`. + /// Allocates the statistics structure for the desired target-tensor + /// level structure (i.e., sizes and types). This constructor does not + /// actually populate the statistics, however; for that see `initialize`. /// - /// Precondition: `dimSizes` must not contain zeros. - SparseTensorNNZ(const std::vector &dimSizes, - const std::vector &sparsity); + /// Precondition: `lvlSizes` must not contain zeros. + /// Asserts: `lvlSizes.size() == lvlTypes.size()`. + SparseTensorNNZ(const std::vector &lvlSizes, + const std::vector &lvlTypes); // We disallow copying to help avoid leaking the stored references. SparseTensorNNZ(const SparseTensorNNZ &) = delete; SparseTensorNNZ &operator=(const SparseTensorNNZ &) = delete; - /// Returns the rank of the target tensor. - uint64_t getRank() const { return dimSizes.size(); } + /// Gets the target-tensor's level-rank. + uint64_t getLvlRank() const { return lvlSizes.size(); } - /// Enumerates the source tensor to fill in the statistics. The - /// enumerator should already incorporate the permutation (from - /// semantic-order to the target storage-order). + /// Enumerates the source tensor to fill in the statistics. + /// The enumerator should already incorporate the mapping from + /// the source tensor-dimensions to the target storage-levels. + /// + /// Asserts: + /// * `enumerator.getTrgRank() == getLvlRank()`. + /// * `enumerator.getTrgSizes() == lvlSizes`. template void initialize(SparseTensorEnumeratorBase &enumerator) { - assert(enumerator.getRank() == getRank() && "Tensor rank mismatch"); - assert(enumerator.permutedSizes() == dimSizes && "Tensor size mismatch"); + assert(enumerator.getTrgRank() == getLvlRank() && "Tensor rank mismatch"); + assert(enumerator.getTrgSizes() == lvlSizes && "Tensor size mismatch"); enumerator.forallElements( [this](const std::vector &ind, V) { add(ind); }); } @@ -710,86 +909,90 @@ /// The type of callback functions which receive an nnz-statistic. using NNZConsumer = const std::function &; - /// Lexicographically enumerates all indicies for dimensions strictly - /// less than `stopDim`, and passes their nnz statistic to the callback. + /// Lexicographically enumerates all indicies for levels strictly + /// less than `stopLvl`, and passes their nnz statistic to the callback. /// Since our use-case only requires the statistic not the coordinates /// themselves, we do not bother to construct those coordinates. - void forallIndices(uint64_t stopDim, NNZConsumer yield) const; + void forallIndices(uint64_t stopLvl, NNZConsumer yield) const; private: /// Adds a new element (i.e., increment its statistics). We use /// a method rather than inlining into the lambda in `initialize`, /// to avoid spurious templating over `V`. And this method is private - /// to avoid needing to re-assert validity of `ind` (which is guaranteed - /// by `forallElements`). - void add(const std::vector &ind); + /// to avoid needing to re-assert validity of `lvlInd` (which is + /// guaranteed by `forallElements`). + void add(const std::vector &lvlInd); /// Recursive component of the public `forallIndices`. - void forallIndices(NNZConsumer yield, uint64_t stopDim, uint64_t parentPos, - uint64_t d) const; + void forallIndices(NNZConsumer yield, uint64_t stopLvl, uint64_t parentPos, + uint64_t l) const; // All of these are in the target storage-order. - const std::vector &dimSizes; - const std::vector &dimTypes; + const std::vector &lvlSizes; + const std::vector &lvlTypes; std::vector> nnz; }; //===----------------------------------------------------------------------===// // Definitions of the ctors and factories of `SparseTensorStorage`. -namespace detail { -/// Asserts that the `dimSizes` (in target-order) under the `perm` (mapping -/// semantic-order to target-order) are a refinement of the desired `shape` -/// (in semantic-order). -/// -/// Precondition: `perm` and `shape` must be valid for `rank`. -void assertPermutedSizesMatchShape(const std::vector &dimSizes, - uint64_t rank, const uint64_t *perm, - const uint64_t *shape); -} // namespace detail - template -SparseTensorStorage *SparseTensorStorage::newSparseTensor( - uint64_t rank, const uint64_t *shape, const uint64_t *perm, - const DimLevelType *sparsity, SparseTensorCOO *coo) { - if (coo) { - const auto &coosz = coo->getDimSizes(); -#ifndef NDEBUG - detail::assertPermutedSizesMatchShape(coosz, rank, perm, shape); -#endif - return new SparseTensorStorage(coosz, perm, sparsity, coo); +SparseTensorStorage *SparseTensorStorage::newFromCOO( + uint64_t dimRank, const uint64_t *dimShape, uint64_t lvlRank, + const DimLevelType *lvlTypes, const uint64_t *lvl2dim, + SparseTensorCOO &lvlCOO) { + assert(dimShape && "Got nullptr for dimension shape"); + assert(lvl2dim && "Got nullptr for level-to-dimension mapping"); + const auto &lvlSizes = lvlCOO.getDimSizes(); + assert(lvlRank == lvlSizes.size() && "Level-rank mismatch"); + // Must reconstruct `dimSizes` from `lvlSizes`. While this is easy + // enough to do when `lvl2dim` is a permutation, this approach will + // not work for more general mappings; so we will need to move this + // computation off to codegen. + std::vector dimSizes(dimRank); + for (uint64_t l = 0; l < lvlRank; ++l) { + const uint64_t d = lvl2dim[l]; + const uint64_t sz = dimShape[d]; + assert((sz == 0 || sz == lvlSizes[l]) && + "Dimension sizes do not match expected shape"); + dimSizes[d] = lvlSizes[l]; } - std::vector permsz(rank); - for (uint64_t r = 0; r < rank; ++r) { - assert(shape[r] > 0 && "Dimension size zero has trivial storage"); - permsz[perm[r]] = shape[r]; - } - // We pass the null `coo` to ensure we select the intended constructor. - return new SparseTensorStorage(permsz, perm, sparsity, coo); + return new SparseTensorStorage(dimRank, dimSizes.data(), lvlRank, + lvlTypes, lvl2dim, lvlCOO); } template -SparseTensorStorage *SparseTensorStorage::newSparseTensor( - uint64_t rank, const uint64_t *shape, const uint64_t *perm, - const DimLevelType *sparsity, const SparseTensorStorageBase *source) { - assert(source && "Got nullptr for source"); - SparseTensorEnumeratorBase *enumerator; - source->newEnumerator(&enumerator, rank, perm); - const auto &permsz = enumerator->permutedSizes(); +SparseTensorStorage *SparseTensorStorage::newFromSparseTensor( + uint64_t dimRank, const uint64_t *dimShape, uint64_t lvlRank, + const uint64_t *lvlSizes, const DimLevelType *lvlTypes, + const uint64_t *lvl2dim, uint64_t srcRank, const uint64_t *src2lvl, + const SparseTensorStorageBase &source) { + // Verify that the `source` dimensions match the expected `dimShape`. + assert(dimShape && "Got nullptr for dimension shape"); + assert(dimRank == source.getDimRank() && "Dimension-rank mismatch"); + const auto &dimSizes = source.getDimSizes(); #ifndef NDEBUG - detail::assertPermutedSizesMatchShape(permsz, rank, perm, shape); + for (uint64_t d = 0; d < dimRank; ++d) { + const uint64_t sz = dimShape[d]; + assert((sz == 0 || sz == dimSizes[d]) && + "Dimension-sizes do not match expected shape"); + } #endif - auto *tensor = - new SparseTensorStorage(permsz, perm, sparsity, *source); - delete enumerator; + SparseTensorEnumeratorBase *lvlEnumerator; + source.newEnumerator(&lvlEnumerator, lvlRank, lvlSizes, srcRank, src2lvl); + auto *tensor = new SparseTensorStorage( + dimRank, dimSizes.data(), lvlRank, lvlTypes, lvl2dim, *lvlEnumerator); + delete lvlEnumerator; return tensor; } template SparseTensorStorage::SparseTensorStorage( - const std::vector &dimSizes, const uint64_t *perm, - const DimLevelType *sparsity, SparseTensorCOO *coo) - : SparseTensorStorage(dimSizes, perm, sparsity) { + uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, + const uint64_t *lvlSizes, const DimLevelType *lvlTypes, + const uint64_t *lvl2dim, bool initializeValuesIfAllDense) + : SparseTensorStorage(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, + lvl2dim) { // Provide hints on capacity of pointers and indices. // TODO: needs much fine-tuning based on actual sparsity; currently // we reserve pointer/index space based on all previous dense @@ -797,139 +1000,151 @@ // we should really use nnz and dense/sparse distribution. bool allDense = true; uint64_t sz = 1; - for (uint64_t r = 0, rank = getRank(); r < rank; ++r) { - if (isCompressedDim(r)) { - // TODO: Take a parameter between 1 and `dimSizes[r]`, and multiply + for (uint64_t l = 0; l < lvlRank; ++l) { + const DimLevelType dlt = lvlTypes[l]; // Avoid redundant bounds checking. + if (isCompressedDLT(dlt)) { + // TODO: Take a parameter between 1 and `lvlSizes[l]`, and multiply // `sz` by that before reserving. (For now we just use 1.) - pointers[r].reserve(sz + 1); - pointers[r].push_back(0); - indices[r].reserve(sz); + pointers[l].reserve(sz + 1); + pointers[l].push_back(0); + indices[l].reserve(sz); sz = 1; allDense = false; - } else if (isSingletonDim(r)) { - indices[r].reserve(sz); + } else if (isSingletonDLT(dlt)) { + indices[l].reserve(sz); sz = 1; allDense = false; } else { // Dense dimension. - ASSERT_DENSE_DIM(r); - sz = detail::checkedMul(sz, getDimSizes()[r]); + ASSERT_DENSE_DLT(dlt); + sz = detail::checkedMul(sz, lvlSizes[l]); } } - // Then assign contents from coordinate scheme tensor if provided. - if (coo) { - // Ensure both preconditions of `fromCOO`. - assert(coo->getDimSizes() == getDimSizes() && "Tensor size mismatch"); - coo->sort(); - // Now actually insert the `elements`. - const std::vector> &elements = coo->getElements(); - uint64_t nnz = elements.size(); - values.reserve(nnz); - fromCOO(elements, 0, nnz, 0); - } else if (allDense) { + if (allDense && initializeValuesIfAllDense) values.resize(sz, 0); - } +} + +template +SparseTensorStorage::SparseTensorStorage( // NOLINT + uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, + const DimLevelType *lvlTypes, const uint64_t *lvl2dim, + SparseTensorCOO &lvlCOO) + : SparseTensorStorage(dimRank, dimSizes, lvlRank, + lvlCOO.getDimSizes().data(), lvlTypes, lvl2dim, + false) { + assert(lvlRank == lvlCOO.getDimSizes().size() && "Level-rank mismatch"); + // Ensure the preconditions of `fromCOO`. (One is already ensured by + // using `lvlSizes = lvlCOO.getDimSizes()` in the ctor above.) + lvlCOO.sort(); + // Now actually insert the `elements`. + const auto &elements = lvlCOO.getElements(); + uint64_t nnz = elements.size(); + values.reserve(nnz); + fromCOO(elements, 0, nnz, 0); } template SparseTensorStorage::SparseTensorStorage( - const std::vector &dimSizes, const uint64_t *perm, - const DimLevelType *sparsity, const SparseTensorStorageBase &tensor) - : SparseTensorStorage(dimSizes, perm, sparsity) { - SparseTensorEnumeratorBase *enumerator; - tensor.newEnumerator(&enumerator, getRank(), perm); + uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, + const DimLevelType *lvlTypes, const uint64_t *lvl2dim, + SparseTensorEnumeratorBase &lvlEnumerator) + : SparseTensorStorage(dimRank, dimSizes, lvlRank, + lvlEnumerator.getTrgSizes().data(), lvlTypes, + lvl2dim) { + assert(lvlRank == lvlEnumerator.getTrgRank() && "Level-rank mismatch"); { // Initialize the statistics structure. - SparseTensorNNZ nnz(getDimSizes(), getDimTypes()); - nnz.initialize(*enumerator); + SparseTensorNNZ nnz(getLvlSizes(), getLvlTypes()); + nnz.initialize(lvlEnumerator); // Initialize "pointers" overhead (and allocate "indices", "values"). - uint64_t parentSz = 1; // assembled-size (not dimension-size) of `r-1`. - for (uint64_t rank = getRank(), r = 0; r < rank; ++r) { - if (isCompressedDim(r)) { - pointers[r].reserve(parentSz + 1); - pointers[r].push_back(0); + uint64_t parentSz = 1; // assembled-size of the `(l - 1)`-level. + for (uint64_t l = 0; l < lvlRank; ++l) { + const auto dlt = lvlTypes[l]; // Avoid redundant bounds checking. + if (isCompressedDLT(dlt)) { + pointers[l].reserve(parentSz + 1); + pointers[l].push_back(0); uint64_t currentPos = 0; - nnz.forallIndices(r, [this, ¤tPos, r](uint64_t n) { + nnz.forallIndices(l, [this, ¤tPos, l](uint64_t n) { currentPos += n; - appendPointer(r, currentPos); + appendPointer(l, currentPos); }); - assert(pointers[r].size() == parentSz + 1 && + assert(pointers[l].size() == parentSz + 1 && "Final pointers size doesn't match allocated size"); - // That assertion entails `assembledSize(parentSz, r)` - // is now in a valid state. That is, `pointers[r][parentSz]` + // That assertion entails `assembledSize(parentSz, l)` + // is now in a valid state. That is, `pointers[l][parentSz]` // equals the present value of `currentPos`, which is the - // correct assembled-size for `indices[r]`. + // correct assembled-size for `indices[l]`. } // Update assembled-size for the next iteration. - parentSz = assembledSize(parentSz, r); - // Ideally we need only `indices[r].reserve(parentSz)`, however + parentSz = assembledSize(parentSz, l); + // Ideally we need only `indices[l].reserve(parentSz)`, however // the `std::vector` implementation forces us to initialize it too. // That is, in the yieldPos loop we need random-access assignment - // to `indices[r]`; however, `std::vector`'s subscript-assignment + // to `indices[l]`; however, `std::vector`'s subscript-assignment // only allows assigning to already-initialized positions. - if (isCompressedDim(r) || isSingletonDim(r)) - indices[r].resize(parentSz, 0); + if (isCompressedDLT(dlt) || isSingletonDLT(dlt)) + indices[l].resize(parentSz, 0); else - ASSERT_DENSE_DIM(r); // Future-proofing. + ASSERT_DENSE_DLT(dlt); // Future-proofing. } values.resize(parentSz, 0); // Both allocate and zero-initialize. } // The yieldPos loop - enumerator->forallElements([this](const std::vector &ind, V val) { + lvlEnumerator.forallElements([this](const auto &lvlInd, V val) { uint64_t parentSz = 1, parentPos = 0; - for (uint64_t rank = getRank(), r = 0; r < rank; ++r) { - if (isCompressedDim(r)) { + for (uint64_t lvlRank = getLvlRank(), l = 0; l < lvlRank; ++l) { + const auto dlt = getLvlTypes()[l]; // Avoid redundant bounds checking. + if (isCompressedDLT(dlt)) { // If `parentPos == parentSz` then it's valid as an array-lookup; // however, it's semantically invalid here since that entry - // does not represent a segment of `indices[r]`. Moreover, that + // does not represent a segment of `indices[l]`. Moreover, that // entry must be immutable for `assembledSize` to remain valid. assert(parentPos < parentSz && "Pointers position is out of bounds"); - const uint64_t currentPos = pointers[r][parentPos]; + const uint64_t currentPos = pointers[l][parentPos]; // This increment won't overflow the `P` type, since it can't - // exceed the original value of `pointers[r][parentPos+1]` + // exceed the original value of `pointers[l][parentPos+1]` // which was already verified to be within bounds for `P` // when it was written to the array. - pointers[r][parentPos]++; - writeIndex(r, currentPos, ind[r]); + pointers[l][parentPos]++; + writeIndex(l, currentPos, lvlInd[l]); parentPos = currentPos; - } else if (isSingletonDim(r)) { - writeIndex(r, parentPos, ind[r]); + } else if (isSingletonDLT(dlt)) { + writeIndex(l, parentPos, lvlInd[l]); // the new parentPos equals the old parentPos. } else { // Dense dimension. - ASSERT_DENSE_DIM(r); - parentPos = parentPos * getDimSizes()[r] + ind[r]; + ASSERT_DENSE_DLT(dlt); + parentPos = parentPos * getLvlSizes()[l] + lvlInd[l]; } - parentSz = assembledSize(parentSz, r); + parentSz = assembledSize(parentSz, l); } assert(parentPos < values.size() && "Value position is out of bounds"); values[parentPos] = val; }); - // No longer need the enumerator, so we'll delete it ASAP. - delete enumerator; // The finalizeYieldPos loop - for (uint64_t parentSz = 1, rank = getRank(), r = 0; r < rank; ++r) { - if (isCompressedDim(r)) { - assert(parentSz == pointers[r].size() - 1 && + for (uint64_t parentSz = 1, l = 0; l < lvlRank; ++l) { + const auto dlt = lvlTypes[l]; // Avoid redundant bounds checking. + if (isCompressedDLT(dlt)) { + assert(parentSz == pointers[l].size() - 1 && "Actual pointers size doesn't match the expected size"); // Can't check all of them, but at least we can check the last one. - assert(pointers[r][parentSz - 1] == pointers[r][parentSz] && + assert(pointers[l][parentSz - 1] == pointers[l][parentSz] && "Pointers got corrupted"); // TODO: optimize this by using `memmove` or similar. for (uint64_t n = 0; n < parentSz; ++n) { const uint64_t parentPos = parentSz - n; - pointers[r][parentPos] = pointers[r][parentPos - 1]; + pointers[l][parentPos] = pointers[l][parentPos - 1]; } - pointers[r][0] = 0; + pointers[l][0] = 0; } else { // Both dense and singleton are no-ops for the finalizeYieldPos loop. // This assertion is for future-proofing. - assert((isDenseDim(r) || isSingletonDim(r)) && - "Dimension is neither dense nor singleton"); + assert((isDenseDLT(dlt) || isSingletonDLT(dlt)) && + "Level is neither dense nor singleton"); } - parentSz = assembledSize(parentSz, r); + parentSz = assembledSize(parentSz, l); } } -#undef ASSERT_DENSE_DIM +#undef ASSERT_DENSE_DLT } // namespace sparse_tensor } // namespace mlir diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h @@ -56,12 +56,16 @@ /// kSparseToSparse STS STS, copied from the STS source /// kToIterator STS Iterator, call @getNext to use and /// @delSparseTensorIterator to free. -MLIR_CRUNNERUTILS_EXPORT void * -_mlir_ciface_newSparseTensor(StridedMemRefType *aref, // NOLINT - StridedMemRefType *sref, - StridedMemRefType *pref, - OverheadType ptrTp, OverheadType indTp, - PrimaryType valTp, Action action, void *ptr); +MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensor( // NOLINT + StridedMemRefType *dimSizesRef, + StridedMemRefType *lvlSizesRef, + StridedMemRefType *lvlTypesRef, + StridedMemRefType *lvl2dimRef, + StridedMemRefType *dim2lvlRef, OverheadType ptrTp, + OverheadType indTp, PrimaryType valTp, Action action, void *ptr); + +// TODO: document what all the arguments are/mean for the functions below, +// especially with regards to "dim"-vs-"lvl" and mappings/permutations. /// Tensor-storage method to obtain direct access to the values array. #define DECL_SPARSEVALUES(VNAME, V) \ @@ -130,8 +134,8 @@ // //===----------------------------------------------------------------------===// -/// Tensor-storage method to get the size of the given dimension. -MLIR_CRUNNERUTILS_EXPORT index_type sparseDimSize(void *tensor, index_type d); +/// Tensor-storage method to get the size of the given level. +MLIR_CRUNNERUTILS_EXPORT index_type sparseLvlSize(void *tensor, index_type l); /// Tensor-storage method to finalize lexicographic insertions. MLIR_CRUNNERUTILS_EXPORT void endInsert(void *tensor); diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -57,14 +57,14 @@ operands); } -/// Generates dimension size call. -static Value genDimSizeCall(OpBuilder &builder, Location loc, +/// Generates call to lookup a level-size. +static Value genLvlSizeCall(OpBuilder &builder, Location loc, SparseTensorEncodingAttr &enc, Value src, - uint64_t idx) { + uint64_t lvl) { // Generate the call. - StringRef name = "sparseDimSize"; + StringRef name = "sparseLvlSize"; SmallVector params{ - src, constantIndex(builder, loc, toStoredDim(enc, idx))}; + src, constantIndex(builder, loc, toStoredDim(enc, lvl))}; Type iTp = builder.getIndexType(); return createFuncCall(builder, loc, name, iTp, params, EmitCInterface::Off) .getResult(0); @@ -72,13 +72,22 @@ /// Compute the size from type (for static sizes) or from an already-converted /// opaque pointer source (for dynamic sizes) at the given dimension. +// +// FIXME: Need to rename this function to match `genLvlSizeCall` and hence +// match the naming convention used in the runtime library. However, it's +// not entirely clear that all callsites of this function properly make the +// "level"-vs-"dimension" distinction; so need to audit each callsite to +// ensure this still does what they mean (possibly by having two separate +// functions, one for levels and one for dimensions). That also means +// renaming `sizesFromPtr`, `sizesFromType`, etc, to make clear whether +// they mean to be referring to level-sizes vs dimension-sizes. static Value sizeFromPtrAtDim(OpBuilder &builder, Location loc, SparseTensorEncodingAttr &enc, ShapedType stp, - Value src, unsigned dim) { + Value src, unsigned i) { auto shape = stp.getShape(); - if (shape[dim] == ShapedType::kDynamicSize) - return genDimSizeCall(builder, loc, enc, src, dim); - return constantIndex(builder, loc, shape[dim]); + if (shape[i] == ShapedType::kDynamicSize) + return genLvlSizeCall(builder, loc, enc, src, i); + return constantIndex(builder, loc, shape[i]); } /// Populates given sizes array from type (for static sizes) and from @@ -225,17 +234,19 @@ } private: - static constexpr unsigned kNumStaticParams = 6; + static constexpr unsigned kNumStaticParams = 8; static constexpr unsigned kNumDynamicParams = 2; static constexpr unsigned kNumParams = kNumStaticParams + kNumDynamicParams; - static constexpr unsigned kParamLvlTypes = 0; - static constexpr unsigned kParamDimSizes = 1; - static constexpr unsigned kParamDim2Lvl = 2; - static constexpr unsigned kParamPtrTp = 3; - static constexpr unsigned kParamIndTp = 4; - static constexpr unsigned kParamValTp = 5; - static constexpr unsigned kParamAction = 6; - static constexpr unsigned kParamPtr = 7; + static constexpr unsigned kParamDimSizes = 0; + static constexpr unsigned kParamLvlSizes = 1; + static constexpr unsigned kParamLvlTypes = 2; + static constexpr unsigned kParamLvl2Dim = 3; + static constexpr unsigned kParamDim2Lvl = 4; + static constexpr unsigned kParamPtrTp = 5; + static constexpr unsigned kParamIndTp = 6; + static constexpr unsigned kParamValTp = 7; + static constexpr unsigned kParamAction = 8; + static constexpr unsigned kParamPtr = 9; OpBuilder &builder; Location loc; @@ -260,10 +271,17 @@ // verification of external data, or for construction of internal data. assert(dimSizes.size() == dimRank && "Dimension-rank mismatch"); params[kParamDimSizes] = genBuffer(builder, loc, dimSizes); - // The dimension-to-level mapping. We must preinitialize `dim2lvl` - // so that the true branch below can perform random-access `operator[]` - // assignment. + // The level-sizes array must be passed as well, since for arbitrary + // dim2lvl mappings it cannot be trivially reconstructed at runtime. + // For now however, since we're still assuming permutations, we will + // initialize this parameter alongside the `dim2lvl` and `lvl2dim` + // parameters below. We preinitialize `lvlSizes` for code symmetry. + SmallVector lvlSizes(lvlRank); + // The dimension-to-level mapping and its inverse. We must preinitialize + // `dim2lvl` so that the true branch below can perform random-access + // `operator[]` assignment. We preinitialize `lvl2dim` for code symmetry. SmallVector dim2lvl(dimRank); + SmallVector lvl2dim(lvlRank); auto dimOrder = enc.getDimOrdering(); if (dimOrder) { assert(dimOrder.isPermutation()); @@ -271,13 +289,20 @@ // The `d`th source variable occurs in the `l`th result position. uint64_t d = dimOrder.getDimPosition(l); dim2lvl[d] = constantIndex(builder, loc, l); + lvl2dim[l] = constantIndex(builder, loc, d); + lvlSizes[l] = dimSizes[d]; } } else { assert(dimRank == lvlRank && "Rank mismatch"); - for (unsigned i = 0; i < lvlRank; i++) - dim2lvl[i] = constantIndex(builder, loc, i); + for (unsigned i = 0; i < lvlRank; i++) { + dim2lvl[i] = lvl2dim[i] = constantIndex(builder, loc, i); + lvlSizes[i] = dimSizes[i]; + } } - params[kParamDim2Lvl] = genBuffer(builder, loc, dim2lvl); + params[kParamLvlSizes] = genBuffer(builder, loc, lvlSizes); + params[kParamLvl2Dim] = genBuffer(builder, loc, lvl2dim); + params[kParamDim2Lvl] = + dimOrder ? genBuffer(builder, loc, dim2lvl) : params[kParamLvl2Dim]; // Secondary and primary types encoding. setTemplateTypes(enc, stp); // Finally, make note that initialization is complete. @@ -316,9 +341,10 @@ /// if val != 0 /// t->add(&val, [i1,..,ik], [p1,..,pk]); static void genAddEltCall(OpBuilder &builder, Location loc, Type eltType, - Value ptr, Value valPtr, Value ind, Value perm) { + Value lvlCOO, Value valPtr, Value dimInd, + Value dim2lvl) { SmallString<9> name{"addElt", primaryTypeFunctionSuffix(eltType)}; - SmallVector params{ptr, valPtr, ind, perm}; + SmallVector params{lvlCOO, valPtr, dimInd, dim2lvl}; Type pTp = getOpaquePointerType(builder); createFuncCall(builder, loc, name, pTp, params, EmitCInterface::On); } @@ -637,7 +663,7 @@ Value src = adaptor.getOperands()[0]; int64_t idx = *index; rewriter.replaceOp(op, - genDimSizeCall(rewriter, op->getLoc(), enc, src, idx)); + genLvlSizeCall(rewriter, op->getLoc(), enc, src, idx)); return success(); } }; diff --git a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt --- a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt @@ -8,6 +8,7 @@ add_mlir_library(MLIRSparseTensorRuntime File.cpp NNZ.cpp + PermutationRef.cpp Storage.cpp EXCLUDE_FROM_LIBMLIR diff --git a/mlir/lib/ExecutionEngine/SparseTensor/File.cpp b/mlir/lib/ExecutionEngine/SparseTensor/File.cpp --- a/mlir/lib/ExecutionEngine/SparseTensor/File.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensor/File.cpp @@ -47,18 +47,24 @@ } } -// TODO(wrengr/bixia): figure out how to reorganize the element-parsing -// loop of `openSparseTensorCOO` into methods of this class, so we can -// avoid leaking access to the `line` pointer (both for general hygiene -// and because we can't mark it const due to the second argument of -// `strtoul`/`strtoud` being `char * *restrict` rather than -// `char const* *restrict`). -// /// Attempts to read a line from the file. -char *SparseTensorReader::readLine() { - if (fgets(line, kColWidth, file)) - return line; - MLIR_SPARSETENSOR_FATAL("Cannot read next line of %s\n", filename); +void SparseTensorReader::readLine() { + if (!fgets(line, kColWidth, file)) + MLIR_SPARSETENSOR_FATAL("Cannot read next line of %s\n", filename); +} + +char *SparseTensorReader::readCOOIndices(uint64_t rank, uint64_t *indices) { + assert(rank == getRank() && "Rank mismatch"); + readLine(); + // Local variable for tracking the parser's position in the `line` buffer. + char *linePtr = line; + for (uint64_t r = 0; r < rank; ++r) { + // Parse the 1-based index. + uint64_t idx = strtoul(linePtr, &linePtr, 10); + // Store the 0-based index. + indices[r] = idx - 1; + } + return linePtr; } /// Reads and parses the file's header. diff --git a/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp b/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp --- a/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp @@ -21,27 +21,21 @@ using namespace mlir::sparse_tensor; //===----------------------------------------------------------------------===// -/// Allocate the statistics structure for the desired sizes and -/// sparsity (in the target tensor's storage-order). This constructor -/// does not actually populate the statistics, however; for that see -/// `initialize`. -/// -/// Precondition: `dimSizes` must not contain zeros. -SparseTensorNNZ::SparseTensorNNZ(const std::vector &dimSizes, - const std::vector &sparsity) - : dimSizes(dimSizes), dimTypes(sparsity), nnz(getRank()) { - assert(dimSizes.size() == dimTypes.size() && "Rank mismatch"); +SparseTensorNNZ::SparseTensorNNZ(const std::vector &lvlSizes, + const std::vector &lvlTypes) + : lvlSizes(lvlSizes), lvlTypes(lvlTypes), nnz(getLvlRank()) { + assert(lvlSizes.size() == lvlTypes.size() && "Rank mismatch"); bool alreadyCompressed = false; (void)alreadyCompressed; - uint64_t sz = 1; // the product of all `dimSizes` strictly less than `r`. - for (uint64_t rank = getRank(), r = 0; r < rank; r++) { - const DimLevelType dlt = sparsity[r]; + uint64_t sz = 1; // the product of all `lvlSizes` strictly less than `l`. + for (uint64_t l = 0, lvlrank = getLvlRank(); l < lvlrank; ++l) { + const DimLevelType dlt = lvlTypes[l]; if (isCompressedDLT(dlt)) { if (alreadyCompressed) MLIR_SPARSETENSOR_FATAL( - "Multiple compressed layers not currently supported"); + "Multiple compressed levels not currently supported"); alreadyCompressed = true; - nnz[r].resize(sz, 0); // Both allocate and zero-initialize. + nnz[l].resize(sz, 0); // Both allocate and zero-initialize. } else if (isDenseDLT(dlt)) { if (alreadyCompressed) MLIR_SPARSETENSOR_FATAL( @@ -52,51 +46,41 @@ // when adding support for multiple compressed dimensions or // for dense-after-compressed. } else { - MLIR_SPARSETENSOR_FATAL("unsupported dimension level type: %d\n", + MLIR_SPARSETENSOR_FATAL("unsupported level type: %d\n", static_cast(dlt)); } - sz = detail::checkedMul(sz, dimSizes[r]); + sz = detail::checkedMul(sz, lvlSizes[l]); } } -/// Lexicographically enumerates all indicies for dimensions strictly -/// less than `stopDim`, and passes their nnz statistic to the callback. -/// Since our use-case only requires the statistic not the coordinates -/// themselves, we do not bother to construct those coordinates. -void SparseTensorNNZ::forallIndices(uint64_t stopDim, +void SparseTensorNNZ::forallIndices(uint64_t stopLvl, SparseTensorNNZ::NNZConsumer yield) const { - assert(stopDim < getRank() && "Dimension out of bounds"); - assert(isCompressedDLT(dimTypes[stopDim]) && - "Cannot look up non-compressed dimensions"); - forallIndices(yield, stopDim, 0, 0); + assert(stopLvl < getLvlRank() && "Level out of bounds"); + assert(isCompressedDLT(lvlTypes[stopLvl]) && + "Cannot look up non-compressed levels"); + forallIndices(yield, stopLvl, 0, 0); } -/// Adds a new element (i.e., increment its statistics). We use -/// a method rather than inlining into the lambda in `initialize`, -/// to avoid spurious templating over `V`. And this method is private -/// to avoid needing to re-assert validity of `ind` (which is guaranteed -/// by `forallElements`). -void SparseTensorNNZ::add(const std::vector &ind) { +void SparseTensorNNZ::add(const std::vector &lvlInd) { uint64_t parentPos = 0; - for (uint64_t rank = getRank(), r = 0; r < rank; ++r) { - if (isCompressedDLT(dimTypes[r])) - nnz[r][parentPos]++; - parentPos = parentPos * dimSizes[r] + ind[r]; + for (uint64_t l = 0, lvlrank = getLvlRank(); l < lvlrank; ++l) { + if (isCompressedDLT(lvlTypes[l])) + nnz[l][parentPos]++; + parentPos = parentPos * lvlSizes[l] + lvlInd[l]; } } -/// Recursive component of the public `forallIndices`. void SparseTensorNNZ::forallIndices(SparseTensorNNZ::NNZConsumer yield, - uint64_t stopDim, uint64_t parentPos, - uint64_t d) const { - assert(d <= stopDim); - if (d == stopDim) { - assert(parentPos < nnz[d].size() && "Cursor is out of range"); - yield(nnz[d][parentPos]); + uint64_t stopLvl, uint64_t parentPos, + uint64_t l) const { + assert(l <= stopLvl); + if (l == stopLvl) { + assert(parentPos < nnz[l].size() && "Cursor is out of range"); + yield(nnz[l][parentPos]); } else { - const uint64_t sz = dimSizes[d]; + const uint64_t sz = lvlSizes[l]; const uint64_t pstart = parentPos * sz; - for (uint64_t i = 0; i < sz; i++) - forallIndices(yield, stopDim, pstart + i, d + 1); + for (uint64_t i = 0; i < sz; ++i) + forallIndices(yield, stopLvl, pstart + i, l + 1); } } diff --git a/mlir/lib/ExecutionEngine/SparseTensor/PermutationRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/PermutationRef.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/ExecutionEngine/SparseTensor/PermutationRef.cpp @@ -0,0 +1,38 @@ +//===- PermutationRef.cpp - Permutation reference wrapper -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/ExecutionEngine/SparseTensor/PermutationRef.h" + +#include +#include +#include + +bool mlir::sparse_tensor::detail::isPermutation(uint64_t size, + const uint64_t *perm) { + assert(perm && "Got nullptr for permutation"); + // TODO: If we ever depend on LLVMSupport, then use `llvm::BitVector` instead. + std::vector seen(size, false); + for (uint64_t i = 0; i < size; ++i) { + const uint64_t j = perm[i]; + if (j >= size || seen[j]) + return false; + seen[j] = true; + } + for (uint64_t i = 0; i < size; ++i) + if (!seen[i]) + return false; + return true; +} + +std::vector +mlir::sparse_tensor::detail::PermutationRef::inverse() const { + std::vector out(permSize); + for (uint64_t i = 0; i < permSize; ++i) + out[perm[i]] = i; + return out; +} diff --git a/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp b/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp --- a/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp @@ -12,9 +12,6 @@ // no benefit). Though this also helps ensure that we avoid weak-vtables: // // -// (This file also contains the definition of `assertPermutedSizesMatchShape` -// which is used by `SparseTensorStorage` factories.) -// // This file is part of the lightweight runtime support library for sparse // tensor manipulations. The functionality of the support library is meant // to simplify benchmarking, testing, and debugging MLIR code operating on @@ -27,25 +24,39 @@ using namespace mlir::sparse_tensor; -SparseTensorStorageBase::SparseTensorStorageBase( - const std::vector &dimSizes, const uint64_t *perm, - const DimLevelType *sparsity) - : dimSizes(dimSizes), rev(getRank()), - dimTypes(sparsity, sparsity + getRank()) { - assert(perm && "Got nullptr for permutation"); - assert(sparsity && "Got nullptr for sparsity"); - const uint64_t rank = getRank(); - // Validate parameters. - assert(rank > 0 && "Trivial shape is unsupported"); - for (uint64_t r = 0; r < rank; ++r) { - assert(dimSizes[r] > 0 && "Dimension size zero has trivial storage"); - assert((isDenseDim(r) || isCompressedDim(r) || isSingletonDim(r)) && - "Unsupported DimLevelType"); +SparseTensorStorageBase::SparseTensorStorageBase( // NOLINT + uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, + const uint64_t *lvlSizes, const DimLevelType *lvlTypes, + const uint64_t *lvl2dim) + : dimSizes(dimSizes, dimSizes + dimRank), + lvlSizes(lvlSizes, lvlSizes + lvlRank), + lvlTypes(lvlTypes, lvlTypes + lvlRank), + lvl2dim(lvl2dim, lvl2dim + lvlRank) { + // TODO: If we do get any nullptrs, I'm pretty sure these assertions + // will run too late (i.e., after copying things into vectors above). + // But since those fields are const I'm not sure there's any clean way + // to assert things before copying... + assert(dimSizes && "Got nullptr for dimension sizes"); + assert(lvlSizes && "Got nullptr for level sizes"); + assert(lvlTypes && "Got nullptr for level types"); + assert(lvl2dim && "Got nullptr for level-to-dimension mapping"); + // Validate dim-indexed parameters. + assert(dimRank > 0 && "Trivial shape is unsupported"); + for (uint64_t d = 0; d < dimRank; ++d) + assert(dimSizes[d] > 0 && "Dimension size zero has trivial storage"); + // Validate level-indexed parameters. + assert(lvlRank > 0 && "Trivial shape is unsupported"); + for (uint64_t l = 0; l < lvlRank; ++l) { + assert(lvlSizes[l] > 0 && "Level size zero has trivial storage"); + const auto dlt = lvlTypes[l]; // Avoid redundant bounds checking. + // We use `MLIR_SPARSETENSOR_FATAL` here instead of `assert` so that + // when this ctor is successful then all the methods can rely on the + // fact that each level-type satisfies one of these options (even + // when `NDEBUG` is true), thereby reducing the need to re-assert things. + if (!(isDenseDLT(dlt) || isCompressedDLT(dlt) || isSingletonDLT(dlt))) + MLIR_SPARSETENSOR_FATAL("unsupported level type: %d\n", + static_cast(dlt)); } - // Construct the "reverse" (i.e., inverse) permutation. - // TODO: should move this computation off to the codegen - for (uint64_t r = 0; r < rank; ++r) - rev[perm[r]] = r; } // Helper macro for generating error messages when some @@ -56,7 +67,8 @@ #define IMPL_NEWENUMERATOR(VNAME, V) \ void SparseTensorStorageBase::newEnumerator( \ - SparseTensorEnumeratorBase **, uint64_t, const uint64_t *) const { \ + SparseTensorEnumeratorBase **, uint64_t, const uint64_t *, uint64_t, \ + const uint64_t *) const { \ FATAL_PIV("newEnumerator" #VNAME); \ } MLIR_SPARSETENSOR_FOREVERY_V(IMPL_NEWENUMERATOR) @@ -99,19 +111,3 @@ #undef IMPL_EXPINSERT #undef FATAL_PIV - -// TODO: try to unify this with `SparseTensorReader::assertMatchesShape` -// (which is used by `openSparseTensorCOO`). It's easy enough to resolve -// the `std::vector` vs pointer mismatch for `dimSizes`; but it's trickier -// to resolve the presence/absence of `perm` (without introducing extra -// overhead), so perhaps the code duplication is unavoidable? -void mlir::sparse_tensor::detail::assertPermutedSizesMatchShape( - const std::vector &dimSizes, uint64_t rank, const uint64_t *perm, - const uint64_t *shape) { - assert(perm && "Got nullptr for permutation"); - assert(shape && "Got nullptr for shape"); - assert(rank == dimSizes.size() && "Rank mismatch"); - for (uint64_t d = 0; d < rank; ++d) - assert((shape[d] == 0 || shape[d] == dimSizes[perm[d]]) && - "Dimension size mismatch"); -} diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -53,8 +53,10 @@ #include "mlir/ExecutionEngine/SparseTensor/COO.h" #include "mlir/ExecutionEngine/SparseTensor/ErrorHandling.h" #include "mlir/ExecutionEngine/SparseTensor/File.h" +#include "mlir/ExecutionEngine/SparseTensor/PermutationRef.h" #include "mlir/ExecutionEngine/SparseTensor/Storage.h" +#include #include using namespace mlir::sparse_tensor; @@ -106,46 +108,59 @@ const typename SparseTensorCOO::const_iterator end; }; +// TODO: When using this library from MLIR, the `toMLIRSparseTensor`/ +// `IMPL_CONVERTTOMLIRSPARSETENSOR` and `fromMLIRSparseTensor`/ +// `IMPL_CONVERTFROMMLIRSPARSETENSOR` constructs will be codegened away; +// therefore, these functions are only used by PyTACO, one place in the +// Python integration tests, and possibly by out-of-tree projects. +// This is notable because neither function can be easily generalized +// to handle non-permutations. In particular, while we could adjust +// the functions to take all the arguments they'd need, that would just +// push the problem into client code. So if we want to generalize these +// functions to support non-permutations, we'll need to figure out how +// to do so without putting undue burden on clients. + /// Initializes sparse tensor from an external COO-flavored format. +/// The `rank` argument is both dimension-rank and level-rank, and the +/// `dim2lvl` argument must be a permutation. /// Used by `IMPL_CONVERTTOMLIRSPARSETENSOR`. +// // TODO: generalize beyond 64-bit indices. template static SparseTensorStorage * -toMLIRSparseTensor(uint64_t rank, uint64_t nse, const uint64_t *shape, - const V *values, const uint64_t *indices, - const uint64_t *perm, const DimLevelType *sparsity) { +toMLIRSparseTensor(uint64_t rank, uint64_t nse, const uint64_t *dimSizes, + const V *values, const uint64_t *dimIndices, + const uint64_t *dim2lvl, const DimLevelType *lvlTypes) { #ifndef NDEBUG - // Verify that perm is a permutation of 0..(rank-1). - std::vector seen(rank, false); - for (uint64_t i = 0; i < rank; ++i) { - const uint64_t j = perm[i]; - if (j >= rank || seen[j]) - MLIR_SPARSETENSOR_FATAL("Not a permutation of 0..%" PRIu64 "\n", rank); - seen[j] = true; - } - // Verify that the sparsity values are supported. // TODO: update this check to match what we actually support. for (uint64_t i = 0; i < rank; ++i) - if (sparsity[i] != DimLevelType::Dense && - sparsity[i] != DimLevelType::Compressed) - MLIR_SPARSETENSOR_FATAL("unsupported dimension level type: %d\n", - static_cast(sparsity[i])); + if (lvlTypes[i] != DimLevelType::Dense && + lvlTypes[i] != DimLevelType::Compressed) + MLIR_SPARSETENSOR_FATAL("unsupported level type: %d\n", + static_cast(lvlTypes[i])); #endif - + // Verify that `dim2lvl` is a permutation of `[0..(rank-1)]`. + // NOTE: The construction of `lvlSizes` and `lvl2dim` don't generalize + // to arbitrary `dim2lvl` mappings. Whereas constructing `lvlInd` from + // `dimInd` does (though the details would have to be updated, just + // like for `IMPL_ADDELT`). + detail::PermutationRef d2l(rank, dim2lvl); // Convert external format to internal COO. - auto *coo = SparseTensorCOO::newSparseTensorCOO(rank, shape, perm, nse); - std::vector idx(rank); - for (uint64_t i = 0, base = 0; i < nse; i++) { - for (uint64_t r = 0; r < rank; r++) - idx[perm[r]] = indices[base + r]; - coo->add(idx, values[i]); - base += rank; + auto lvlSizes = d2l.pushforward(rank, dimSizes); + auto *lvlCOO = new SparseTensorCOO(lvlSizes, nse); + std::vector lvlInd(rank); + const uint64_t *dimInd = dimIndices; + for (uint64_t i = 0; i < nse; ++i) { + d2l.pushforward(rank, dimInd, lvlInd.data()); + lvlCOO->add(lvlInd, values[i]); + dimInd += rank; } // Return sparse tensor storage format as opaque pointer. - auto *tensor = SparseTensorStorage::newSparseTensor( - rank, shape, perm, sparsity, coo); - delete coo; + auto lvl2dim = d2l.inverse(); + auto *tensor = SparseTensorStorage::newFromCOO( + rank, dimSizes, rank, lvlTypes, lvl2dim.data(), *lvlCOO); + delete lvlCOO; return tensor; } @@ -164,30 +179,34 @@ uint64_t *pRank, uint64_t *pNse, uint64_t **pShape, V **pValues, uint64_t **pIndices) { assert(tensor && "Received nullptr for tensor"); - uint64_t rank = tensor->getRank(); - std::vector perm(rank); - std::iota(perm.begin(), perm.end(), 0); - SparseTensorCOO *coo = tensor->toCOO(perm.data()); + uint64_t dimRank = tensor->getDimRank(); + const auto &dimSizes = tensor->getDimSizes(); + std::vector identityPerm(dimRank); + std::iota(identityPerm.begin(), identityPerm.end(), 0); + SparseTensorCOO *coo = + tensor->toCOO(dimRank, dimSizes.data(), dimRank, identityPerm.data()); const std::vector> &elements = coo->getElements(); uint64_t nse = elements.size(); - uint64_t *shape = new uint64_t[rank]; - for (uint64_t i = 0; i < rank; i++) - shape[i] = coo->getDimSizes()[i]; + const auto &cooSizes = coo->getDimSizes(); + assert(cooSizes.size() == dimRank && "Rank mismatch"); + uint64_t *shape = new uint64_t[dimRank]; + std::memcpy((void *)shape, (void *)cooSizes.data(), + sizeof(uint64_t) * dimRank); V *values = new V[nse]; - uint64_t *indices = new uint64_t[rank * nse]; + uint64_t *indices = new uint64_t[dimRank * nse]; - for (uint64_t i = 0, base = 0; i < nse; i++) { + for (uint64_t i = 0, base = 0; i < nse; ++i) { values[i] = elements[i].value; - for (uint64_t j = 0; j < rank; j++) - indices[base + j] = elements[i].indices[j]; - base += rank; + for (uint64_t d = 0; d < dimRank; ++d) + indices[base + d] = elements[i].indices[d]; + base += dimRank; } delete coo; - *pRank = rank; + *pRank = dimRank; *pNse = nse; *pShape = shape; *pValues = values; @@ -207,36 +226,44 @@ #define CASE(p, i, v, P, I, V) \ if (ptrTp == (p) && indTp == (i) && valTp == (v)) { \ - SparseTensorCOO *coo = nullptr; \ - if (action <= Action::kFromCOO) { \ - if (action == Action::kFromFile) { \ - char *filename = static_cast(ptr); \ - coo = openSparseTensorCOO(filename, rank, shape, perm, v); \ - } else if (action == Action::kFromCOO) { \ - coo = static_cast *>(ptr); \ - } else { \ - assert(action == Action::kEmpty); \ - } \ - auto *tensor = SparseTensorStorage::newSparseTensor( \ - rank, shape, perm, sparsity, coo); \ - if (action == Action::kFromFile) \ - delete coo; \ - return tensor; \ + switch (action) { \ + case Action::kEmpty: \ + return SparseTensorStorage::newEmpty( \ + dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, lvl2dim); \ + case Action::kFromFile: { \ + char *filename = static_cast(ptr); \ + return openSparseTensor(dimRank, dimSizes, lvlRank, lvlTypes, \ + lvl2dim, dim2lvl, filename, v); \ } \ - if (action == Action::kSparseToSparse) { \ - auto *tensor = static_cast(ptr); \ - return SparseTensorStorage::newSparseTensor(rank, shape, perm, \ - sparsity, tensor); \ + case Action::kFromCOO: { \ + assert(ptr && "Received nullptr for SparseTensorCOO object"); \ + auto &coo = *static_cast *>(ptr); \ + return SparseTensorStorage::newFromCOO( \ + dimRank, dimSizes, lvlRank, lvlTypes, lvl2dim, coo); \ } \ - if (action == Action::kEmptyCOO) \ - return SparseTensorCOO::newSparseTensorCOO(rank, shape, perm); \ - coo = static_cast *>(ptr)->toCOO(perm); \ - if (action == Action::kToIterator) { \ + case Action::kSparseToSparse: { \ + assert(ptr && "Received nullptr for SparseTensorStorage object"); \ + auto &tensor = *static_cast(ptr); \ + return SparseTensorStorage::newFromSparseTensor( \ + dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, lvl2dim, dimRank, \ + dim2lvl, tensor); \ + } \ + case Action::kEmptyCOO: \ + return new SparseTensorCOO(lvlRank, lvlSizes); \ + case Action::kToCOO: { \ + assert(ptr && "Received nullptr for SparseTensorStorage object"); \ + auto &tensor = *static_cast *>(ptr); \ + return tensor.toCOO(lvlRank, lvlSizes, dimRank, dim2lvl); \ + } \ + case Action::kToIterator: { \ + assert(ptr && "Received nullptr for SparseTensorStorage object"); \ + auto &tensor = *static_cast *>(ptr); \ + auto *coo = tensor.toCOO(lvlRank, lvlSizes, dimRank, dim2lvl); \ return new SparseTensorIterator(coo); \ - } else { \ - assert(action == Action::kToCOO); \ } \ - return coo; \ + } \ + MLIR_SPARSETENSOR_FATAL("unknown action: %d\n", \ + static_cast(action)); \ } #define CASE_SECSAME(p, v, P, V) CASE(p, p, v, P, P, V) @@ -247,20 +274,32 @@ static_assert(std::is_same::value, "Expected index_type == uint64_t"); -void * -_mlir_ciface_newSparseTensor(StridedMemRefType *aref, // NOLINT - StridedMemRefType *sref, - StridedMemRefType *pref, - OverheadType ptrTp, OverheadType indTp, - PrimaryType valTp, Action action, void *ptr) { - assert(aref && sref && pref); - assert(aref->strides[0] == 1 && sref->strides[0] == 1 && - pref->strides[0] == 1); - assert(aref->sizes[0] == sref->sizes[0] && sref->sizes[0] == pref->sizes[0]); - const DimLevelType *sparsity = aref->data + aref->offset; - const index_type *shape = sref->data + sref->offset; - const index_type *perm = pref->data + pref->offset; - uint64_t rank = aref->sizes[0]; +// TODO: this swiss-army-knife should be split up into separate functions +// for each action, since the various actions don't agree on (1) whether +// the first two arguments are "sizes" vs "shapes", (2) whether the "lvl" +// arguments are actually storage-levels vs target tensor-dimensions, +// (3) whether all the arguments are actually used/required. +void *_mlir_ciface_newSparseTensor( // NOLINT + StridedMemRefType *dimSizesRef, + StridedMemRefType *lvlSizesRef, + StridedMemRefType *lvlTypesRef, + StridedMemRefType *lvl2dimRef, + StridedMemRefType *dim2lvlRef, OverheadType ptrTp, + OverheadType indTp, PrimaryType valTp, Action action, void *ptr) { + assert(dimSizesRef && dimSizesRef->strides[0] == 1); + assert(lvlSizesRef && lvlSizesRef->strides[0] == 1); + assert(lvlTypesRef && lvlTypesRef->strides[0] == 1); + assert(lvl2dimRef && lvl2dimRef->strides[0] == 1); + assert(dim2lvlRef && dim2lvlRef->strides[0] == 1); + const uint64_t dimRank = dimSizesRef->sizes[0]; + const uint64_t lvlRank = lvlSizesRef->sizes[0]; + assert(dim2lvlRef->sizes[0] == dimRank); + assert(lvlTypesRef->sizes[0] == lvlRank && lvl2dimRef->sizes[0] == lvlRank); + const index_type *dimSizes = dimSizesRef->data + dimSizesRef->offset; + const index_type *lvlSizes = lvlSizesRef->data + lvlSizesRef->offset; + const DimLevelType *lvlTypes = lvlTypesRef->data + lvlTypesRef->offset; + const index_type *lvl2dim = lvl2dimRef->data + lvl2dimRef->offset; + const index_type *dim2lvl = dim2lvlRef->data + dim2lvlRef->offset; // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. // This is safe because of the static_assert above. @@ -415,22 +454,26 @@ #undef IMPL_SPARSEINDICES #undef IMPL_GETOVERHEAD +// TODO: while this API design will work for arbitrary dim2lvl mappings, +// we should probably move the `dimInd`-to-`lvlInd` computation into codegen +// (since that could enable optimizations to remove the intermediate memref). #define IMPL_ADDELT(VNAME, V) \ - void *_mlir_ciface_addElt##VNAME(void *coo, StridedMemRefType *vref, \ - StridedMemRefType *iref, \ - StridedMemRefType *pref) { \ - assert(coo &&vref &&iref &&pref); \ - assert(iref->strides[0] == 1 && pref->strides[0] == 1); \ - assert(iref->sizes[0] == pref->sizes[0]); \ - const index_type *indx = iref->data + iref->offset; \ - const index_type *perm = pref->data + pref->offset; \ - uint64_t isize = iref->sizes[0]; \ - std::vector indices(isize); \ - for (uint64_t r = 0; r < isize; r++) \ - indices[perm[r]] = indx[r]; \ + void *_mlir_ciface_addElt##VNAME( \ + void *lvlCOO, StridedMemRefType *vref, \ + StridedMemRefType *dimIndRef, \ + StridedMemRefType *dim2lvlRef) { \ + assert(lvlCOO &&vref &&dimIndRef &&dim2lvlRef); \ + assert(dimIndRef->strides[0] == 1 && dim2lvlRef->strides[0] == 1); \ + const uint64_t rank = dimIndRef->sizes[0]; \ + assert(dim2lvlRef->sizes[0] == rank); \ + const index_type *dimInd = dimIndRef->data + dimIndRef->offset; \ + const index_type *dim2lvl = dim2lvlRef->data + dim2lvlRef->offset; \ + std::vector lvlInd(rank); \ + for (uint64_t d = 0; d < rank; ++d) \ + lvlInd[dim2lvl[d]] = dimInd[d]; \ V *value = vref->data + vref->offset; \ - static_cast *>(coo)->add(indices, *value); \ - return coo; \ + static_cast *>(lvlCOO)->add(lvlInd, *value); \ + return lvlCOO; \ } MLIR_SPARSETENSOR_FOREVERY_V(IMPL_ADDELT) #undef IMPL_ADDELT @@ -498,8 +541,8 @@ // //===----------------------------------------------------------------------===// -index_type sparseDimSize(void *tensor, index_type d) { - return static_cast(tensor)->getDimSize(d); +index_type sparseLvlSize(void *tensor, index_type x) { + return static_cast(tensor)->getLvlSize(x); } void endInsert(void *tensor) { diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -40,7 +40,7 @@ // CHECK-LABEL: func @sparse_dim1d( // CHECK-SAME: %[[A:.*]]: !llvm.ptr) // CHECK: %[[C:.*]] = arith.constant 0 : index -// CHECK: %[[D:.*]] = call @sparseDimSize(%[[A]], %[[C]]) +// CHECK: %[[D:.*]] = call @sparseLvlSize(%[[A]], %[[C]]) // CHECK: return %[[D]] : index func.func @sparse_dim1d(%arg0: tensor) -> index { %c = arith.constant 0 : index @@ -51,7 +51,7 @@ // CHECK-LABEL: func @sparse_dim3d( // CHECK-SAME: %[[A:.*]]: !llvm.ptr) // CHECK: %[[C:.*]] = arith.constant 2 : index -// CHECK: %[[D:.*]] = call @sparseDimSize(%[[A]], %[[C]]) +// CHECK: %[[D:.*]] = call @sparseLvlSize(%[[A]], %[[C]]) // CHECK: return %[[D]] : index func.func @sparse_dim3d(%arg0: tensor) -> index { // Querying for dimension 1 in the tensor type needs to be @@ -78,13 +78,15 @@ // CHECK-LABEL: func @sparse_new1d( // CHECK-SAME: %[[A:.*]]: !llvm.ptr) -> !llvm.ptr // CHECK-DAG: %[[FromFile:.*]] = arith.constant 1 : i32 -// CHECK-DAG: %[[P:.*]] = memref.alloca() : memref<1xi8> -// CHECK-DAG: %[[Q:.*]] = memref.alloca() : memref<1xindex> -// CHECK-DAG: %[[R:.*]] = memref.alloca() : memref<1xindex> -// CHECK-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<1xi8> to memref -// CHECK-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<1xindex> to memref -// CHECK-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<1xindex> to memref -// CHECK: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromFile]], %[[A]]) +// CHECK-DAG: %[[DimSizes0:.*]] = memref.alloca() : memref<1xindex> +// CHECK-DAG: %[[LvlSizes0:.*]] = memref.alloca() : memref<1xindex> +// CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<1xi8> +// CHECK-DAG: %[[Iota0:.*]] = memref.alloca() : memref<1xindex> +// CHECK-DAG: %[[DimSizes:.*]] = memref.cast %[[DimSizes0]] : memref<1xindex> to memref +// CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<1xindex> to memref +// CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<1xi8> to memref +// CHECK-DAG: %[[Iota:.*]] = memref.cast %[[Iota0]] : memref<1xindex> to memref +// CHECK: %[[T:.*]] = call @newSparseTensor(%[[DimSizes]], %[[LvlSizes]], %[[LvlTypes]], %[[Iota]], %[[Iota]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromFile]], %[[A]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_new1d(%arg0: !llvm.ptr) -> tensor<128xf64, #SparseVector> { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor<128xf64, #SparseVector> @@ -94,13 +96,15 @@ // CHECK-LABEL: func @sparse_new2d( // CHECK-SAME: %[[A:.*]]: !llvm.ptr) -> !llvm.ptr // CHECK-DAG: %[[FromFile:.*]] = arith.constant 1 : i32 -// CHECK-DAG: %[[P:.*]] = memref.alloca() : memref<2xi8> -// CHECK-DAG: %[[Q:.*]] = memref.alloca() : memref<2xindex> -// CHECK-DAG: %[[R:.*]] = memref.alloca() : memref<2xindex> -// CHECK-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<2xi8> to memref -// CHECK-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<2xindex> to memref -// CHECK-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<2xindex> to memref -// CHECK: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromFile]], %[[A]]) +// CHECK-DAG: %[[DimSizes0:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[LvlSizes0:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<2xi8> +// CHECK-DAG: %[[Iota0:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[DimSizes:.*]] = memref.cast %[[DimSizes0]] : memref<2xindex> to memref +// CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<2xindex> to memref +// CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<2xi8> to memref +// CHECK-DAG: %[[Iota:.*]] = memref.cast %[[Iota0]] : memref<2xindex> to memref +// CHECK: %[[T:.*]] = call @newSparseTensor(%[[DimSizes]], %[[LvlSizes]], %[[LvlTypes]], %[[Iota]], %[[Iota]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromFile]], %[[A]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_new2d(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor @@ -110,13 +114,17 @@ // CHECK-LABEL: func @sparse_new3d( // CHECK-SAME: %[[A:.*]]: !llvm.ptr) -> !llvm.ptr // CHECK-DAG: %[[FromFile:.*]] = arith.constant 1 : i32 -// CHECK-DAG: %[[P:.*]] = memref.alloca() : memref<3xi8> -// CHECK-DAG: %[[Q:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[R:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<3xi8> to memref -// CHECK-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<3xindex> to memref -// CHECK-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<3xindex> to memref -// CHECK: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromFile]], %[[A]]) +// CHECK-DAG: %[[DimSizes0:.*]] = memref.alloca() : memref<3xindex> +// CHECK-DAG: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex> +// CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8> +// CHECK-DAG: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex> +// CHECK-DAG: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex> +// CHECK-DAG: %[[DimSizes:.*]] = memref.cast %[[DimSizes0]] : memref<3xindex> to memref +// CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref +// CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref +// CHECK-DAG: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref +// CHECK-DAG: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref +// CHECK: %[[T:.*]] = call @newSparseTensor(%[[DimSizes]], %[[LvlSizes]], %[[LvlTypes]], %[[Lvl2Dim]], %[[Dim2Lvl]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromFile]], %[[A]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_new3d(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor @@ -129,16 +137,18 @@ // CHECK-DAG: %[[Empty:.*]] = arith.constant 0 : i32 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[P:.*]] = memref.alloca() : memref<2xi8> -// CHECK-DAG: %[[Q:.*]] = memref.alloca() : memref<2xindex> -// CHECK-DAG: %[[R:.*]] = memref.alloca() : memref<2xindex> -// CHECK-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<2xi8> to memref -// CHECK-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<2xindex> to memref -// CHECK-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<2xindex> to memref -// CHECK-DAG: memref.store %[[I]], %[[Q]][%[[C0]]] : memref<2xindex> -// CHECK-DAG: memref.store %[[J]], %[[Q]][%[[C1]]] : memref<2xindex> +// CHECK-DAG: %[[DimSizes0:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[LvlSizes0:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<2xi8> +// CHECK-DAG: %[[Iota0:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[DimSizes:.*]] = memref.cast %[[DimSizes0]] : memref<2xindex> to memref +// CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<2xindex> to memref +// CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<2xi8> to memref +// CHECK-DAG: %[[Iota:.*]] = memref.cast %[[Iota0]] : memref<2xindex> to memref +// CHECK-DAG: memref.store %[[I]], %[[DimSizes0]][%[[C0]]] : memref<2xindex> +// CHECK-DAG: memref.store %[[J]], %[[DimSizes0]][%[[C1]]] : memref<2xindex> // CHECK: %[[NP:.*]] = llvm.mlir.null : !llvm.ptr -// CHECK: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[Empty]], %[[NP]]) +// CHECK: %[[T:.*]] = call @newSparseTensor(%[[DimSizes]], %[[LvlSizes]], %[[LvlTypes]], %[[Iota]], %[[Iota]], %{{.*}}, %{{.*}}, %{{.*}}, %[[Empty]], %[[NP]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_init(%arg0: index, %arg1: index) -> tensor { %0 = bufferization.alloc_tensor(%arg0, %arg1) : tensor @@ -350,7 +360,7 @@ // CHECK-LABEL: func @sparse_expansion3( // CHECK: %[[C1:.*]] = arith.constant 1 : index // CHECK: %[[N:.*]] = call @newSparseTensor -// CHECK: %[[S:.*]] = call @sparseDimSize(%[[N]], %c1) : (!llvm.ptr, index) -> index +// CHECK: %[[S:.*]] = call @sparseLvlSize(%[[N]], %c1) : (!llvm.ptr, index) -> index // CHECK: %[[A:.*]] = memref.alloc(%[[S]]) : memref // CHECK: %[[B:.*]] = memref.alloc(%[[S]]) : memref // CHECK: %[[C:.*]] = memref.alloc(%[[S]]) : memref @@ -396,7 +406,7 @@ // CHECK-SAME: %[[B:.*]]: !llvm.ptr) // CHECK-DAG: %[[ToCOO:.*]] = arith.constant 5 : i32 // CHECK-DAG: %[[Sort:.*]] = arith.constant false -// CHECK: %[[COO:.*]] = call @newSparseTensor(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[ToCOO]], %[[A]]) +// CHECK: %[[COO:.*]] = call @newSparseTensor(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[ToCOO]], %[[A]]) // CHECK: call @outSparseTensorF64(%[[COO]], %[[B]], %[[Sort]]) : (!llvm.ptr, !llvm.ptr, i1) -> () // CHECK: call @delSparseTensorCOOF64(%[[COO]]) // CHECK: return @@ -410,7 +420,7 @@ // CHECK-SAME: %[[B:.*]]: !llvm.ptr) // CHECK-DAG: %[[ToCOO:.*]] = arith.constant 5 : i32 // CHECK-DAG: %[[Sort:.*]] = arith.constant true -// CHECK: %[[COO:.*]] = call @newSparseTensor(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[ToCOO]], %[[A]]) +// CHECK: %[[COO:.*]] = call @newSparseTensor(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[ToCOO]], %[[A]]) // CHECK: call @outSparseTensorF32(%[[COO]], %[[B]], %[[Sort]]) : (!llvm.ptr, !llvm.ptr, i1) -> () // CHECK: call @delSparseTensorCOOF32(%[[COO]]) // CHECK: return diff --git a/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir b/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir --- a/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir +++ b/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir @@ -23,14 +23,16 @@ // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index // CHECK-DAG: %[[U:.*]] = tensor.dim %[[A]], %[[C0]] : tensor -// CHECK-DAG: %[[P:.*]] = memref.alloca() : memref<1xi8> -// CHECK-DAG: %[[Q:.*]] = memref.alloca() : memref<1xindex> -// CHECK-DAG: %[[R:.*]] = memref.alloca() : memref<1xindex> -// CHECK-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<1xi8> to memref -// CHECK-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<1xindex> to memref -// CHECK-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<1xindex> to memref +// CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<1xi8> +// CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<1xindex> +// CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<1xindex> +// CHECK-DAG: %[[Iota:.*]] = memref.alloca() : memref<1xindex> +// CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<1xi8> to memref +// CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<1xindex> to memref +// CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<1xindex> to memref +// CHECK-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<1xindex> to memref // CHECK: %[[NP:.*]] = llvm.mlir.null : !llvm.ptr -// CHECK: %[[C:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[EmptyCOO]], %[[NP]]) +// CHECK: %[[C:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[EmptyCOO]], %[[NP]]) // CHECK: %[[M:.*]] = memref.alloca() : memref<1xindex> // CHECK: %[[T:.*]] = memref.cast %[[M]] : memref<1xindex> to memref // CHECK: %[[BUF:.*]] = memref.alloca() : memref @@ -40,10 +42,10 @@ // CHECK: scf.if %[[N]] { // CHECK: memref.store %[[I]], %[[M]][%[[C0]]] : memref<1xindex> // CHECK: memref.store %[[E]], %[[BUF]][] : memref -// CHECK: call @addEltI32(%[[C]], %[[BUF]], %[[T]], %[[Z]]) +// CHECK: call @addEltI32(%[[C]], %[[BUF]], %[[T]], %[[IotaP]]) // CHECK: } // CHECK: } -// CHECK: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]]) +// CHECK: %[[T:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]]) // CHECK: call @delSparseTensorCOOI32(%[[C]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_convert_1d(%arg0: tensor) -> tensor { @@ -79,14 +81,16 @@ // CHECK-DAG: %[[FromCOO:.*]] = arith.constant 2 : i32 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[P:.*]] = memref.alloca() : memref<2xi8> -// CHECK-DAG: %[[Q:.*]] = memref.alloca() : memref<2xindex> -// CHECK-DAG: %[[R:.*]] = memref.alloca() : memref<2xindex> -// CHECK-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<2xi8> to memref -// CHECK-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<2xindex> to memref -// CHECK-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<2xindex> to memref +// CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<2xi8> +// CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[Iota:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<2xi8> to memref +// CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<2xindex> to memref +// CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<2xindex> to memref +// CHECK-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<2xindex> to memref // CHECK: %[[NP:.*]] = llvm.mlir.null : !llvm.ptr -// CHECK: %[[C:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[EmptyCOO]], %[[NP]]) +// CHECK: %[[C:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[EmptyCOO]], %[[NP]]) // CHECK: %[[M:.*]] = memref.alloca() : memref<2xindex> // CHECK: %[[T:.*]] = memref.cast %[[M]] : memref<2xindex> to memref // CHECK: %[[BUF:.*]] = memref.alloca() : memref @@ -96,10 +100,10 @@ // CHECK: memref.store %[[I]], %[[M]][%[[C0]]] : memref<2xindex> // CHECK: memref.store %[[J]], %[[M]][%[[C1]]] : memref<2xindex> // CHECK: memref.store %[[E]], %[[BUF]][] : memref -// CHECK: call @addEltF64(%[[C]], %[[BUF]], %[[T]], %[[Z]]) +// CHECK: call @addEltF64(%[[C]], %[[BUF]], %[[T]], %[[IotaP]]) // CHECK: } // CHECK: } -// CHECK: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]]) +// CHECK: %[[T:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]]) // CHECK: call @delSparseTensorCOOF64(%[[C]]) // CHECK: return %[[T]] : !llvm.ptr @@ -140,14 +144,16 @@ // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[P:.*]] = memref.alloca() : memref<2xi8> -// CHECK-DAG: %[[Q:.*]] = memref.alloca() : memref<2xindex> -// CHECK-DAG: %[[R:.*]] = memref.alloca() : memref<2xindex> -// CHECK-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<2xi8> to memref -// CHECK-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<2xindex> to memref -// CHECK-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<2xindex> to memref +// CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<2xi8> +// CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[Iota:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<2xi8> to memref +// CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<2xindex> to memref +// CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<2xindex> to memref +// CHECK-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<2xindex> to memref // CHECK: %[[NP:.*]] = llvm.mlir.null : !llvm.ptr -// CHECK: %[[C:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[EmptyCOO]], %[[NP]]) +// CHECK: %[[C:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[EmptyCOO]], %[[NP]]) // CHECK: %[[M:.*]] = memref.alloca() : memref<2xindex> // CHECK: %[[N:.*]] = memref.cast %[[M]] : memref<2xindex> to memref // CHECK: %[[BUF:.*]] = memref.alloca() : memref @@ -156,9 +162,9 @@ // CHECK-DAG: memref.store %{{.*}}, %[[M]][%[[C1]]] : memref<2xindex> // CHECK-DAG: %[[V:.*]] = tensor.extract %{{.*}}[%[[I]]] : tensor<2xf32> // CHECK: memref.store %[[V]], %[[BUF]][] : memref -// CHECK: call @addEltF32(%{{.*}}, %[[BUF]], %[[N]], %{{.*}}) +// CHECK: call @addEltF32(%{{.*}}, %[[BUF]], %[[N]], %[[IotaP]]) // CHECK: } -// CHECK: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]]) +// CHECK: %[[T:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]]) // CHECK: call @delSparseTensorCOOF32(%[[C]]) // CHECK: return %[[T]] : !llvm.ptr @@ -206,14 +212,18 @@ // CHECK-DAG: %[[U1:.*]] = tensor.dim %[[A]], %[[C0]] : tensor // CHECK-DAG: %[[U2:.*]] = tensor.dim %[[A]], %[[C1]] : tensor // CHECK-DAG: %[[U3:.*]] = tensor.dim %[[A]], %[[C2]] : tensor -// CHECK-DAG: %[[P:.*]] = memref.alloca() : memref<3xi8> -// CHECK-DAG: %[[Q:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[R:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<3xi8> to memref -// CHECK-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<3xindex> to memref -// CHECK-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<3xindex> to memref +// CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<3xi8> +// CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<3xindex> +// CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<3xindex> +// CHECK-DAG: %[[Lvl2Dim:.*]] = memref.alloca() : memref<3xindex> +// CHECK-DAG: %[[Dim2Lvl:.*]] = memref.alloca() : memref<3xindex> +// CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<3xi8> to memref +// CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<3xindex> to memref +// CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<3xindex> to memref +// CHECK-DAG: %[[Lvl2DimP:.*]] = memref.cast %[[Lvl2Dim]] : memref<3xindex> to memref +// CHECK-DAG: %[[Dim2LvlP:.*]] = memref.cast %[[Dim2Lvl]] : memref<3xindex> to memref // CHECK: %[[NP:.*]] = llvm.mlir.null : !llvm.ptr -// CHECK: %[[C:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[EmptyCOO]], %[[NP]]) +// CHECK: %[[C:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[Lvl2DimP]], %[[Dim2LvlP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[EmptyCOO]], %[[NP]]) // CHECK: %[[M:.*]] = memref.alloca() : memref<3xindex> // CHECK: %[[N:.*]] = memref.cast %[[M]] : memref<3xindex> to memref // CHECK: %[[BUF:.*]] = memref.alloca() : memref @@ -225,11 +235,11 @@ // CHECK: memref.store %[[J]], %[[M]][%[[C1]]] : memref<3xindex> // CHECK: memref.store %[[K]], %[[M]][%[[C2]]] : memref<3xindex> // CHECK: memref.store %[[E]], %[[BUF]][] : memref -// CHECK: call @addEltF64(%[[C]], %[[BUF]], %[[N]], %[[Z]]) +// CHECK: call @addEltF64(%[[C]], %[[BUF]], %[[N]], %[[Dim2LvlP]]) // CHECK: } // CHECK: } // CHECK: } -// CHECK: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]]) +// CHECK: %[[T:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[Lvl2DimP]], %[[Dim2LvlP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]]) // CHECK: call @delSparseTensorCOOF64(%[[C]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_convert_3d(%arg0: tensor) -> tensor { diff --git a/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir b/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir --- a/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir +++ b/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir @@ -20,24 +20,28 @@ // CHECK-SAME: %[[Arg:.*]]: !llvm.ptr) -> tensor<13xi32> // CHECK-DAG: %[[I0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[I13:.*]] = arith.constant 13 : index -// CHECK-DAG: %[[AttrsS:.*]] = memref.alloca() : memref<1xi8> -// CHECK-DAG: %[[AttrsD:.*]] = memref.cast %[[AttrsS]] : memref<1xi8> to memref // CHECK-DAG: %[[DenseDLT:.*]] = arith.constant 4 : i8 -// CHECK-DAG: memref.store %[[DenseDLT]], %[[AttrsS]][%[[I0]]] : memref<1xi8> -// CHECK-DAG: %[[SizesS:.*]] = memref.alloca() : memref<1xindex> -// CHECK-DAG: %[[SizesD:.*]] = memref.cast %[[SizesS]] : memref<1xindex> to memref -// CHECK-DAG: memref.store %[[I13]], %[[SizesS]][%[[I0]]] : memref<1xindex> -// CHECK-DAG: %[[PermS:.*]] = memref.alloca() : memref<1xindex> -// CHECK-DAG: %[[PermD:.*]] = memref.cast %[[PermS]] : memref<1xindex> to memref -// CHECK-DAG: memref.store %[[I0]], %[[PermS]][%[[I0]]] : memref<1xindex> -// CHECK-DAG: %[[zeroI32:.*]] = arith.constant 0 : i32 -// CHECK-DAG: %[[ElemTpActionToIter:.*]] = arith.constant 6 : i32 -// CHECK-DAG: %[[Iter:.*]] = call @newSparseTensor(%[[AttrsD]], %[[SizesD]], %[[PermD]], %[[zeroI32]], %[[zeroI32]], %[[ElemTpActionToIter]], %[[ElemTpActionToIter]], %[[Arg]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[C6:.*]] = arith.constant 6 : i32 +// +// CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<1xi8> +// CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<1xi8> to memref +// CHECK-DAG: memref.store %[[DenseDLT]], %[[LvlTypes]][%[[I0]]] : memref<1xi8> +// CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<1xindex> +// CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<1xindex> to memref +// CHECK-DAG: memref.store %[[I13]], %[[DimSizes]][%[[I0]]] : memref<1xindex> +// CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<1xindex> +// CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<1xindex> to memref +// CHECK-DAG: %[[Iota:.*]] = memref.alloca() : memref<1xindex> +// CHECK-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<1xindex> to memref +// CHECK-DAG: memref.store %[[I0]], %[[Iota]][%[[I0]]] : memref<1xindex> +// CHECK: %[[Iter:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %[[C0]], %[[C0]], %[[C6]], %[[C6]], %[[Arg]]) +// // CHECK-DAG: %[[IndS:.*]] = memref.alloca() : memref<1xindex> // CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<1xindex> to memref // CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref // CHECK-DAG: %[[M:.*]] = memref.alloc() : memref<13xi32> -// CHECK-DAG: linalg.fill ins(%[[zeroI32]] : i32) outs(%[[M]] : memref<13xi32>) +// CHECK-DAG: linalg.fill ins(%[[C0]] : i32) outs(%[[M]] : memref<13xi32>) // CHECK: scf.while : () -> () { // CHECK: %[[Cond:.*]] = func.call @getNextI32(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr, memref, memref) -> i1 // CHECK: scf.condition(%[[Cond]]) @@ -57,25 +61,29 @@ // CHECK-LABEL: func @sparse_convert_1d_dyn( // CHECK-SAME: %[[Arg:.*]]: !llvm.ptr) -> tensor // CHECK-DAG: %[[I0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[AttrsS:.*]] = memref.alloca() : memref<1xi8> -// CHECK-DAG: %[[AttrsD:.*]] = memref.cast %[[AttrsS]] : memref<1xi8> to memref // CHECK-DAG: %[[DenseDLT:.*]] = arith.constant 4 : i8 -// CHECK-DAG: memref.store %[[DenseDLT]], %[[AttrsS]][%[[I0]]] : memref<1xi8> -// CHECK-DAG: %[[SizesS:.*]] = memref.alloca() : memref<1xindex> -// CHECK-DAG: %[[SizesD:.*]] = memref.cast %[[SizesS]] : memref<1xindex> to memref -// CHECK-DAG: %[[SizeI0:.*]] = call @sparseDimSize(%[[Arg]], %[[I0]]) : (!llvm.ptr, index) -> index -// CHECK-DAG: memref.store %[[SizeI0]], %[[SizesS]][%[[I0]]] : memref<1xindex> -// CHECK-DAG: %[[PermS:.*]] = memref.alloca() : memref<1xindex> -// CHECK-DAG: %[[PermD:.*]] = memref.cast %[[PermS]] : memref<1xindex> to memref -// CHECK-DAG: memref.store %[[I0]], %[[PermS]][%[[I0]]] : memref<1xindex> -// CHECK-DAG: %[[zeroI32:.*]] = arith.constant 0 : i32 -// CHECK-DAG: %[[ElemTpActionToIter:.*]] = arith.constant 6 : i32 -// CHECK-DAG: %[[Iter:.*]] = call @newSparseTensor(%[[AttrsD]], %[[SizesD]], %[[PermD]], %[[zeroI32]], %[[zeroI32]], %[[ElemTpActionToIter]], %[[ElemTpActionToIter]], %[[Arg]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[C6:.*]] = arith.constant 6 : i32 +// +// CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<1xi8> +// CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<1xi8> to memref +// CHECK-DAG: memref.store %[[DenseDLT]], %[[LvlTypes]][%[[I0]]] : memref<1xi8> +// CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<1xindex> +// CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<1xindex> to memref +// CHECK-DAG: %[[SizeI0:.*]] = call @sparseLvlSize(%[[Arg]], %[[I0]]) : (!llvm.ptr, index) -> index +// CHECK-DAG: memref.store %[[SizeI0]], %[[DimSizes]][%[[I0]]] : memref<1xindex> +// CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<1xindex> +// CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<1xindex> to memref +// CHECK-DAG: %[[Iota:.*]] = memref.alloca() : memref<1xindex> +// CHECK-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<1xindex> to memref +// CHECK-DAG: memref.store %[[I0]], %[[Iota]][%[[I0]]] : memref<1xindex> +// CHECK: %[[Iter:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %[[C0]], %[[C0]], %[[C6]], %[[C6]], %[[Arg]]) +// // CHECK-DAG: %[[IndS:.*]] = memref.alloca() : memref<1xindex> // CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<1xindex> to memref // CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref // CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI0]]) : memref -// CHECK-DAG: linalg.fill ins(%[[zeroI32]] : i32) outs(%[[M]] : memref) +// CHECK-DAG: linalg.fill ins(%[[C0]] : i32) outs(%[[M]] : memref) // CHECK: scf.while : () -> () { // CHECK: %[[Cond:.*]] = func.call @getNextI32(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr, memref, memref) -> i1 // CHECK: scf.condition(%[[Cond]]) @@ -98,26 +106,30 @@ // CHECK-DAG: %[[I1:.*]] = arith.constant 1 : index // CHECK-DAG: %[[I2:.*]] = arith.constant 2 : index // CHECK-DAG: %[[I4:.*]] = arith.constant 4 : index -// CHECK-DAG: %[[AttrsS:.*]] = memref.alloca() : memref<2xi8> -// CHECK-DAG: %[[AttrsD:.*]] = memref.cast %[[AttrsS]] : memref<2xi8> to memref // CHECK-DAG: %[[DenseDLT:.*]] = arith.constant 4 : i8 -// CHECK-DAG: memref.store %[[DenseDLT]], %[[AttrsS]][%[[I0]]] : memref<2xi8> -// CHECK-DAG: memref.store %[[DenseDLT]], %[[AttrsS]][%[[I1]]] : memref<2xi8> -// CHECK-DAG: %[[SizesS:.*]] = memref.alloca() : memref<2xindex> -// CHECK-DAG: %[[SizesD:.*]] = memref.cast %[[SizesS]] : memref<2xindex> to memref -// CHECK-DAG: memref.store %[[I2]], %[[SizesS]][%[[I0]]] : memref<2xindex> -// CHECK-DAG: memref.store %[[I4]], %[[SizesS]][%[[I1]]] : memref<2xindex> -// CHECK-DAG: %[[PermS:.*]] = memref.alloca() : memref<2xindex> -// CHECK-DAG: %[[PermD:.*]] = memref.cast %[[PermS]] : memref<2xindex> to memref -// CHECK-DAG: memref.store %[[I0]], %[[PermS]][%[[I0]]] : memref<2xindex> -// CHECK-DAG: memref.store %[[I1]], %[[PermS]][%[[I1]]] : memref<2xindex> // CHECK-DAG: %[[ActionToIter:.*]] = arith.constant 6 : i32 -// CHECK-DAG: %[[Iter:.*]] = call @newSparseTensor(%[[AttrsD]], %[[SizesD]], %[[PermD]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ActionToIter]], %[[Arg]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64 +// +// CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<2xi8> +// CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<2xi8> to memref +// CHECK-DAG: memref.store %[[DenseDLT]], %[[LvlTypes]][%[[I0]]] : memref<2xi8> +// CHECK-DAG: memref.store %[[DenseDLT]], %[[LvlTypes]][%[[I1]]] : memref<2xi8> +// CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<2xindex> to memref +// CHECK-DAG: memref.store %[[I2]], %[[DimSizes]][%[[I0]]] : memref<2xindex> +// CHECK-DAG: memref.store %[[I4]], %[[DimSizes]][%[[I1]]] : memref<2xindex> +// CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<2xindex> to memref +// CHECK-DAG: %[[Iota:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<2xindex> to memref +// CHECK-DAG: memref.store %[[I0]], %[[Iota]][%[[I0]]] : memref<2xindex> +// CHECK-DAG: memref.store %[[I1]], %[[Iota]][%[[I1]]] : memref<2xindex> +// CHECK: %[[Iter:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ActionToIter]], %[[Arg]]) +// // CHECK-DAG: %[[IndS:.*]] = memref.alloca() : memref<2xindex> // CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<2xindex> to memref // CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref // CHECK-DAG: %[[M:.*]] = memref.alloc() : memref<2x4xf64> -// CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<2x4xf64>) // CHECK: scf.while : () -> () { // CHECK: %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr, memref, memref) -> i1 @@ -153,27 +165,31 @@ // CHECK-DAG: %[[I0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[I1:.*]] = arith.constant 1 : index // CHECK-DAG: %[[I4:.*]] = arith.constant 4 : index -// CHECK-DAG: %[[AttrsS:.*]] = memref.alloca() : memref<2xi8> -// CHECK-DAG: %[[AttrsD:.*]] = memref.cast %[[AttrsS]] : memref<2xi8> to memref // CHECK-DAG: %[[DenseDLT:.*]] = arith.constant 4 : i8 -// CHECK-DAG: memref.store %[[DenseDLT]], %[[AttrsS]][%[[I0]]] : memref<2xi8> -// CHECK-DAG: memref.store %[[DenseDLT]], %[[AttrsS]][%[[I1]]] : memref<2xi8> -// CHECK-DAG: %[[SizesS:.*]] = memref.alloca() : memref<2xindex> -// CHECK-DAG: %[[SizesD:.*]] = memref.cast %[[SizesS]] : memref<2xindex> to memref -// CHECK-DAG: %[[SizeI0:.*]] = call @sparseDimSize(%[[Arg]], %[[I0]]) : (!llvm.ptr, index) -> index -// CHECK-DAG: memref.store %[[SizeI0]], %[[SizesS]][%[[I0]]] : memref<2xindex> -// CHECK-DAG: memref.store %[[I4]], %[[SizesS]][%[[I1]]] : memref<2xindex> -// CHECK-DAG: %[[PermS:.*]] = memref.alloca() : memref<2xindex> -// CHECK-DAG: %[[PermD:.*]] = memref.cast %[[PermS]] : memref<2xindex> to memref -// CHECK-DAG: memref.store %[[I0]], %[[PermS]][%[[I0]]] : memref<2xindex> -// CHECK-DAG: memref.store %[[I1]], %[[PermS]][%[[I1]]] : memref<2xindex> // CHECK-DAG: %[[ActionToIter:.*]] = arith.constant 6 : i32 -// CHECK-DAG: %[[Iter:.*]] = call @newSparseTensor(%[[AttrsD]], %[[SizesD]], %[[PermD]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ActionToIter]], %[[Arg]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64 +// +// CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<2xi8> +// CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<2xi8> to memref +// CHECK-DAG: memref.store %[[DenseDLT]], %[[LvlTypes]][%[[I0]]] : memref<2xi8> +// CHECK-DAG: memref.store %[[DenseDLT]], %[[LvlTypes]][%[[I1]]] : memref<2xi8> +// CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<2xindex> to memref +// CHECK-DAG: %[[SizeI0:.*]] = call @sparseLvlSize(%[[Arg]], %[[I0]]) : (!llvm.ptr, index) -> index +// CHECK-DAG: memref.store %[[SizeI0]], %[[DimSizes]][%[[I0]]] : memref<2xindex> +// CHECK-DAG: memref.store %[[I4]], %[[DimSizes]][%[[I1]]] : memref<2xindex> +// CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<2xindex> to memref +// CHECK-DAG: %[[Iota:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<2xindex> to memref +// CHECK-DAG: memref.store %[[I0]], %[[Iota]][%[[I0]]] : memref<2xindex> +// CHECK-DAG: memref.store %[[I1]], %[[Iota]][%[[I1]]] : memref<2xindex> +// CHECK: %[[Iter:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ActionToIter]], %[[Arg]]) +// // CHECK-DAG: %[[IndS:.*]] = memref.alloca() : memref<2xindex> // CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<2xindex> to memref // CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref // CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI0]]) : memref -// CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref) // CHECK: scf.while : () -> () { // CHECK: %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr, memref, memref) -> i1 @@ -197,27 +213,31 @@ // CHECK-DAG: %[[I0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[I1:.*]] = arith.constant 1 : index // CHECK-DAG: %[[I2:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[AttrsS:.*]] = memref.alloca() : memref<2xi8> -// CHECK-DAG: %[[AttrsD:.*]] = memref.cast %[[AttrsS]] : memref<2xi8> to memref // CHECK-DAG: %[[DenseDLT:.*]] = arith.constant 4 : i8 -// CHECK-DAG: memref.store %[[DenseDLT]], %[[AttrsS]][%[[I0]]] : memref<2xi8> -// CHECK-DAG: memref.store %[[DenseDLT]], %[[AttrsS]][%[[I1]]] : memref<2xi8> -// CHECK-DAG: %[[SizesS:.*]] = memref.alloca() : memref<2xindex> -// CHECK-DAG: %[[SizesD:.*]] = memref.cast %[[SizesS]] : memref<2xindex> to memref -// CHECK-DAG: %[[SizeI1:.*]] = call @sparseDimSize(%[[Arg]], %[[I1]]) : (!llvm.ptr, index) -> index -// CHECK-DAG: memref.store %[[I2]], %[[SizesS]][%[[I0]]] : memref<2xindex> -// CHECK-DAG: memref.store %[[SizeI1]], %[[SizesS]][%[[I1]]] : memref<2xindex> -// CHECK-DAG: %[[PermS:.*]] = memref.alloca() : memref<2xindex> -// CHECK-DAG: %[[PermD:.*]] = memref.cast %[[PermS]] : memref<2xindex> to memref -// CHECK-DAG: memref.store %[[I0]], %[[PermS]][%[[I0]]] : memref<2xindex> -// CHECK-DAG: memref.store %[[I1]], %[[PermS]][%[[I1]]] : memref<2xindex> // CHECK-DAG: %[[ActionToIter:.*]] = arith.constant 6 : i32 -// CHECK-DAG: %[[Iter:.*]] = call @newSparseTensor(%[[AttrsD]], %[[SizesD]], %[[PermD]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ActionToIter]], %[[Arg]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64 +// +// CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<2xi8> +// CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<2xi8> to memref +// CHECK-DAG: memref.store %[[DenseDLT]], %[[LvlTypes]][%[[I0]]] : memref<2xi8> +// CHECK-DAG: memref.store %[[DenseDLT]], %[[LvlTypes]][%[[I1]]] : memref<2xi8> +// CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<2xindex> to memref +// CHECK-DAG: %[[SizeI1:.*]] = call @sparseLvlSize(%[[Arg]], %[[I1]]) : (!llvm.ptr, index) -> index +// CHECK-DAG: memref.store %[[I2]], %[[DimSizes]][%[[I0]]] : memref<2xindex> +// CHECK-DAG: memref.store %[[SizeI1]], %[[DimSizes]][%[[I1]]] : memref<2xindex> +// CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<2xindex> to memref +// CHECK-DAG: %[[Iota:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<2xindex> to memref +// CHECK-DAG: memref.store %[[I0]], %[[Iota]][%[[I0]]] : memref<2xindex> +// CHECK-DAG: memref.store %[[I1]], %[[Iota]][%[[I1]]] : memref<2xindex> +// CHECK: %[[Iter:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ActionToIter]], %[[Arg]]) +// // CHECK-DAG: %[[IndS:.*]] = memref.alloca() : memref<2xindex> // CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<2xindex> to memref // CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref // CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI1]]) : memref<2x?xf64> -// CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<2x?xf64>) // CHECK: scf.while : () -> () { // CHECK: %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr, memref, memref) -> i1 @@ -240,28 +260,32 @@ // CHECK-SAME: %[[Arg:.*]]: !llvm.ptr) -> tensor // CHECK-DAG: %[[I0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[I1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[AttrsS:.*]] = memref.alloca() : memref<2xi8> -// CHECK-DAG: %[[AttrsD:.*]] = memref.cast %[[AttrsS]] : memref<2xi8> to memref // CHECK-DAG: %[[DenseDLT:.*]] = arith.constant 4 : i8 -// CHECK-DAG: memref.store %[[DenseDLT]], %[[AttrsS]][%[[I0]]] : memref<2xi8> -// CHECK-DAG: memref.store %[[DenseDLT]], %[[AttrsS]][%[[I1]]] : memref<2xi8> -// CHECK-DAG: %[[SizesS:.*]] = memref.alloca() : memref<2xindex> -// CHECK-DAG: %[[SizesD:.*]] = memref.cast %[[SizesS]] : memref<2xindex> to memref -// CHECK-DAG: %[[SizeI0:.*]] = call @sparseDimSize(%[[Arg]], %[[I0]]) : (!llvm.ptr, index) -> index -// CHECK-DAG: %[[SizeI1:.*]] = call @sparseDimSize(%[[Arg]], %[[I1]]) : (!llvm.ptr, index) -> index -// CHECK-DAG: memref.store %[[SizeI0]], %[[SizesS]][%[[I0]]] : memref<2xindex> -// CHECK-DAG: memref.store %[[SizeI1]], %[[SizesS]][%[[I1]]] : memref<2xindex> -// CHECK-DAG: %[[PermS:.*]] = memref.alloca() : memref<2xindex> -// CHECK-DAG: %[[PermD:.*]] = memref.cast %[[PermS]] : memref<2xindex> to memref -// CHECK-DAG: memref.store %[[I0]], %[[PermS]][%[[I0]]] : memref<2xindex> -// CHECK-DAG: memref.store %[[I1]], %[[PermS]][%[[I1]]] : memref<2xindex> // CHECK-DAG: %[[ActionToIter:.*]] = arith.constant 6 : i32 -// CHECK-DAG: %[[Iter:.*]] = call @newSparseTensor(%[[AttrsD]], %[[SizesD]], %[[PermD]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ActionToIter]], %[[Arg]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64 +// +// CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<2xi8> +// CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<2xi8> to memref +// CHECK-DAG: memref.store %[[DenseDLT]], %[[LvlTypes]][%[[I0]]] : memref<2xi8> +// CHECK-DAG: memref.store %[[DenseDLT]], %[[LvlTypes]][%[[I1]]] : memref<2xi8> +// CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<2xindex> to memref +// CHECK-DAG: %[[SizeI0:.*]] = call @sparseLvlSize(%[[Arg]], %[[I0]]) : (!llvm.ptr, index) -> index +// CHECK-DAG: %[[SizeI1:.*]] = call @sparseLvlSize(%[[Arg]], %[[I1]]) : (!llvm.ptr, index) -> index +// CHECK-DAG: memref.store %[[SizeI0]], %[[DimSizes]][%[[I0]]] : memref<2xindex> +// CHECK-DAG: memref.store %[[SizeI1]], %[[DimSizes]][%[[I1]]] : memref<2xindex> +// CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<2xindex> to memref +// CHECK-DAG: %[[Iota:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<2xindex> to memref +// CHECK-DAG: memref.store %[[I0]], %[[Iota]][%[[I0]]] : memref<2xindex> +// CHECK-DAG: memref.store %[[I1]], %[[Iota]][%[[I1]]] : memref<2xindex> +// CHECK: %[[Iter:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ActionToIter]], %[[Arg]]) +// // CHECK-DAG: %[[IndS:.*]] = memref.alloca() : memref<2xindex> // CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<2xindex> to memref // CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref // CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI0]], %[[SizeI1]]) : memref -// CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref) // CHECK: scf.while : () -> () { // CHECK: %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr, memref, memref) -> i1 @@ -303,29 +327,33 @@ // CHECK-DAG: %[[I2:.*]] = arith.constant 2 : index // CHECK-DAG: %[[I3:.*]] = arith.constant 3 : index // CHECK-DAG: %[[I4:.*]] = arith.constant 4 : index -// CHECK-DAG: %[[AttrsS:.*]] = memref.alloca() : memref<3xi8> -// CHECK-DAG: %[[AttrsD:.*]] = memref.cast %[[AttrsS]] : memref<3xi8> to memref // CHECK-DAG: %[[DenseDLT:.*]] = arith.constant 4 : i8 -// CHECK-DAG: memref.store %[[DenseDLT]], %[[AttrsS]][%[[I0]]] : memref<3xi8> -// CHECK-DAG: memref.store %[[DenseDLT]], %[[AttrsS]][%[[I1]]] : memref<3xi8> -// CHECK-DAG: memref.store %[[DenseDLT]], %[[AttrsS]][%[[I2]]] : memref<3xi8> -// CHECK-DAG: %[[SizesS:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[SizesD:.*]] = memref.cast %[[SizesS]] : memref<3xindex> to memref -// CHECK-DAG: memref.store %[[I2]], %[[SizesS]][%[[I0]]] : memref<3xindex> -// CHECK-DAG: memref.store %[[I3]], %[[SizesS]][%[[I1]]] : memref<3xindex> -// CHECK-DAG: memref.store %[[I4]], %[[SizesS]][%[[I2]]] : memref<3xindex> -// CHECK-DAG: %[[PermS:.*]] = memref.alloca() : memref<3xindex> -// CHECK-DAG: %[[PermD:.*]] = memref.cast %[[PermS]] : memref<3xindex> to memref -// CHECK-DAG: memref.store %[[I0]], %[[PermS]][%[[I0]]] : memref<3xindex> -// CHECK-DAG: memref.store %[[I1]], %[[PermS]][%[[I1]]] : memref<3xindex> -// CHECK-DAG: memref.store %[[I2]], %[[PermS]][%[[I2]]] : memref<3xindex> // CHECK-DAG: %[[ActionToIter:.*]] = arith.constant 6 : i32 -// CHECK-DAG: %[[Iter:.*]] = call @newSparseTensor(%[[AttrsD]], %[[SizesD]], %[[PermD]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ActionToIter]], %[[Arg]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64 +// +// CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<3xi8> +// CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<3xi8> to memref +// CHECK-DAG: memref.store %[[DenseDLT]], %[[LvlTypes]][%[[I0]]] : memref<3xi8> +// CHECK-DAG: memref.store %[[DenseDLT]], %[[LvlTypes]][%[[I1]]] : memref<3xi8> +// CHECK-DAG: memref.store %[[DenseDLT]], %[[LvlTypes]][%[[I2]]] : memref<3xi8> +// CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<3xindex> +// CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<3xindex> to memref +// CHECK-DAG: memref.store %[[I2]], %[[DimSizes]][%[[I0]]] : memref<3xindex> +// CHECK-DAG: memref.store %[[I3]], %[[DimSizes]][%[[I1]]] : memref<3xindex> +// CHECK-DAG: memref.store %[[I4]], %[[DimSizes]][%[[I2]]] : memref<3xindex> +// CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<3xindex> +// CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<3xindex> to memref +// CHECK-DAG: %[[Iota:.*]] = memref.alloca() : memref<3xindex> +// CHECK-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<3xindex> to memref +// CHECK-DAG: memref.store %[[I0]], %[[Iota]][%[[I0]]] : memref<3xindex> +// CHECK-DAG: memref.store %[[I1]], %[[Iota]][%[[I1]]] : memref<3xindex> +// CHECK-DAG: memref.store %[[I2]], %[[Iota]][%[[I2]]] : memref<3xindex> +// CHECK: %[[Iter:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ActionToIter]], %[[Arg]]) +// // CHECK-DAG: %[[IndS:.*]] = memref.alloca() : memref<3xindex> // CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<3xindex> to memref // CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref // CHECK-DAG: %[[M:.*]] = memref.alloc() : memref<2x3x4xf64> -// CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<2x3x4xf64>) // CHECK: scf.while : () -> () { // CHECK: %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr, memref, memref) -> i1 diff --git a/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir b/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir --- a/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir +++ b/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir @@ -44,13 +44,15 @@ // CHECK-LABEL: func @sparse_convert_1d_ss( // CHECK-SAME: %[[A:.*]]: !llvm.ptr) // CHECK-DAG: %[[SparseToSparse:.*]] = arith.constant 3 : i32 -// CHECK-DAG: %[[P:.*]] = memref.alloca() : memref<1xi8> -// CHECK-DAG: %[[Q:.*]] = memref.alloca() : memref<1xindex> -// CHECK-DAG: %[[R:.*]] = memref.alloca() : memref<1xindex> -// CHECK-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<1xi8> to memref -// CHECK-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<1xindex> to memref -// CHECK-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<1xindex> to memref -// CHECK: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[SparseToSparse]], %[[A]]) +// CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<1xi8> +// CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<1xindex> +// CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<1xindex> +// CHECK-DAG: %[[Iota:.*]] = memref.alloca() : memref<1xindex> +// CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<1xi8> to memref +// CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<1xindex> to memref +// CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<1xindex> to memref +// CHECK-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<1xindex> to memref +// CHECK: %[[T:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[SparseToSparse]], %[[A]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_convert_1d_ss(%arg0: tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor @@ -59,28 +61,33 @@ // CHECK-COO-LABEL: func @sparse_convert( // CHECK-COO-SAME: %[[A:.*]]: !llvm.ptr) -// CHECK-COO-DAG: %[[ToCOO:.*]] = arith.constant 5 : i32 -// CHECK-COO-DAG: %[[FromCOO:.*]] = arith.constant 2 : i32 -// CHECK-COO-DAG: %[[P:.*]] = memref.alloca() : memref<1xi8> -// CHECK-COO-DAG: %[[Q:.*]] = memref.alloca() : memref<1xindex> -// CHECK-COO-DAG: %[[R:.*]] = memref.alloca() : memref<1xindex> -// CHECK-COO-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<1xi8> to memref -// CHECK-COO-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<1xindex> to memref -// CHECK-COO-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<1xindex> to memref -// CHECK-COO: %[[C:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ToCOO]], %[[A]]) -// CHECK-COO: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]]) +// CHECK-COO-DAG: %[[ToCOO:.*]] = arith.constant 5 : i32 +// CHECK-COO-DAG: %[[FromCOO:.*]] = arith.constant 2 : i32 +// CHECK-COO-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<1xi8> +// CHECK-COO-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<1xindex> +// CHECK-COO-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<1xindex> +// CHECK-COO-DAG: %[[Iota:.*]] = memref.alloca() : memref<1xindex> +// CHECK-COO-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<1xi8> to memref +// CHECK-COO-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<1xindex> to memref +// CHECK-COO-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<1xindex> to memref +// CHECK-COO-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<1xindex> to memref +// CHECK-COO: %[[C:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ToCOO]], %[[A]]) +// CHECK-COO: %[[T:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]]) // CHECK-COO: call @delSparseTensorCOOF32(%[[C]]) // CHECK-COO: return %[[T]] : !llvm.ptr +// // CHECK-AUTO-LABEL: func @sparse_convert( // CHECK-AUTO-SAME: %[[A:.*]]: !llvm.ptr) // CHECK-AUTO-DAG: %[[SparseToSparse:.*]] = arith.constant 3 : i32 -// CHECK-AUTO-DAG: %[[P:.*]] = memref.alloca() : memref<1xi8> -// CHECK-AUTO-DAG: %[[Q:.*]] = memref.alloca() : memref<1xindex> -// CHECK-AUTO-DAG: %[[R:.*]] = memref.alloca() : memref<1xindex> -// CHECK-AUTO-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<1xi8> to memref -// CHECK-AUTO-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<1xindex> to memref -// CHECK-AUTO-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<1xindex> to memref -// CHECK-AUTO: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[SparseToSparse]], %[[A]]) +// CHECK-AUTO-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<1xi8> +// CHECK-AUTO-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<1xindex> +// CHECK-AUTO-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<1xindex> +// CHECK-AUTO-DAG: %[[Iota:.*]] = memref.alloca() : memref<1xindex> +// CHECK-AUTO-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<1xi8> to memref +// CHECK-AUTO-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<1xindex> to memref +// CHECK-AUTO-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<1xindex> to memref +// CHECK-AUTO-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<1xindex> to memref +// CHECK-AUTO: %[[T:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[SparseToSparse]], %[[A]]) // CHECK-AUTO: return %[[T]] : !llvm.ptr // CHECK-RWT-LABEL: func.func @sparse_convert( @@ -121,28 +128,33 @@ // CHECK-COO-LABEL: func @sparse_convert_singleton( // CHECK-COO-SAME: %[[A:.*]]: !llvm.ptr) -// CHECK-COO-DAG: %[[ToCOO:.*]] = arith.constant 5 : i32 -// CHECK-COO-DAG: %[[FromCOO:.*]] = arith.constant 2 : i32 -// CHECK-COO-DAG: %[[P:.*]] = memref.alloca() : memref<1xi8> -// CHECK-COO-DAG: %[[Q:.*]] = memref.alloca() : memref<1xindex> -// CHECK-COO-DAG: %[[R:.*]] = memref.alloca() : memref<1xindex> -// CHECK-COO-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<1xi8> to memref -// CHECK-COO-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<1xindex> to memref -// CHECK-COO-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<1xindex> to memref -// CHECK-COO: %[[C:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ToCOO]], %[[A]]) -// CHECK-COO: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]]) +// CHECK-COO-DAG: %[[ToCOO:.*]] = arith.constant 5 : i32 +// CHECK-COO-DAG: %[[FromCOO:.*]] = arith.constant 2 : i32 +// CHECK-COO-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<1xi8> +// CHECK-COO-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<1xindex> +// CHECK-COO-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<1xindex> +// CHECK-COO-DAG: %[[Iota:.*]] = memref.alloca() : memref<1xindex> +// CHECK-COO-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<1xi8> to memref +// CHECK-COO-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<1xindex> to memref +// CHECK-COO-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<1xindex> to memref +// CHECK-COO-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<1xindex> to memref +// CHECK-COO: %[[C:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ToCOO]], %[[A]]) +// CHECK-COO: %[[T:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]]) // CHECK-COO: call @delSparseTensorCOOF32(%[[C]]) // CHECK-COO: return %[[T]] : !llvm.ptr +// // CHECK-AUTO-LABEL: func @sparse_convert_singleton( // CHECK-AUTO-SAME: %[[A:.*]]: !llvm.ptr) // CHECK-AUTO-DAG: %[[SparseToSparse:.*]] = arith.constant 3 : i32 -// CHECK-AUTO-DAG: %[[P:.*]] = memref.alloca() : memref<1xi8> -// CHECK-AUTO-DAG: %[[Q:.*]] = memref.alloca() : memref<1xindex> -// CHECK-AUTO-DAG: %[[R:.*]] = memref.alloca() : memref<1xindex> -// CHECK-AUTO-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<1xi8> to memref -// CHECK-AUTO-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<1xindex> to memref -// CHECK-AUTO-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<1xindex> to memref -// CHECK-AUTO: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[SparseToSparse]], %[[A]]) +// CHECK-AUTO-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<1xi8> +// CHECK-AUTO-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<1xindex> +// CHECK-AUTO-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<1xindex> +// CHECK-AUTO-DAG: %[[Iota:.*]] = memref.alloca() : memref<1xindex> +// CHECK-AUTO-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<1xi8> to memref +// CHECK-AUTO-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<1xindex> to memref +// CHECK-AUTO-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<1xindex> to memref +// CHECK-AUTO-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<1xindex> to memref +// CHECK-AUTO: %[[T:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[SparseToSparse]], %[[A]]) // CHECK-AUTO: return %[[T]] : !llvm.ptr func.func @sparse_convert_singleton(%arg0: tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor diff --git a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir @@ -31,19 +31,21 @@ // CHECK: } // CHECK: } // CHECK: } -// CHECK: %[[TMP_1:.*]] = memref.alloca() : memref<2xi8> -// CHECK: %[[TMP_2:.*]] = memref.cast %[[TMP_1]] : memref<2xi8> to memref -// CHECK: memref.store %[[TMP_c8_i8]], %[[TMP_1]][%[[TMP_c0]]] : memref<2xi8> -// CHECK: memref.store %[[TMP_c8_i8]], %[[TMP_1]][%[[TMP_c1]]] : memref<2xi8> -// CHECK: %[[TMP_3:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[TMP_4:.*]] = memref.cast %[[TMP_3]] : memref<2xindex> to memref -// CHECK: memref.store %[[TMP_c3]], %[[TMP_3]][%[[TMP_c0]]] : memref<2xindex> -// CHECK: memref.store %[[TMP_c4]], %[[TMP_3]][%[[TMP_c1]]] : memref<2xindex> -// CHECK: %[[TMP_5:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[TMP_6:.*]] = memref.cast %[[TMP_5]] : memref<2xindex> to memref -// CHECK: memref.store %[[TMP_c0]], %[[TMP_5]][%[[TMP_c0]]] : memref<2xindex> -// CHECK: memref.store %[[TMP_c1]], %[[TMP_5]][%[[TMP_c1]]] : memref<2xindex> -// CHECK: %[[TMP_7:.*]] = call @newSparseTensor(%[[TMP_2]], %[[TMP_4]], %[[TMP_6]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<2xi8> +// CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<2xi8> to memref +// CHECK-DAG: memref.store %[[TMP_c8_i8]], %[[LvlTypes]][%[[TMP_c0]]] : memref<2xi8> +// CHECK-DAG: memref.store %[[TMP_c8_i8]], %[[LvlTypes]][%[[TMP_c1]]] : memref<2xi8> +// CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<2xindex> to memref +// CHECK-DAG: memref.store %[[TMP_c3]], %[[DimSizes]][%[[TMP_c0]]] : memref<2xindex> +// CHECK-DAG: memref.store %[[TMP_c4]], %[[DimSizes]][%[[TMP_c1]]] : memref<2xindex> +// CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<2xindex> to memref +// CHECK-DAG: %[[Iota:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<2xindex> to memref +// CHECK-DAG: memref.store %[[TMP_c0]], %[[Iota]][%[[TMP_c0]]] : memref<2xindex> +// CHECK-DAG: memref.store %[[TMP_c1]], %[[Iota]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_7:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) // CHECK: %[[TMP_8:.*]] = memref.alloca() : memref<2xindex> // CHECK: %[[TMP_9:.*]] = memref.cast %[[TMP_8]] : memref<2xindex> to memref // CHECK: %[[TMP_10:.*]] = memref.alloca() : memref @@ -84,20 +86,22 @@ // CHECK-DAG: %[[TMP_c5:.*]] = arith.constant 5 : index // CHECK-DAG: %[[TMP_c4:.*]] = arith.constant 4 : index // CHECK-DAG: %[[TMP_c8_i8:.*]] = arith.constant 8 : i8 -// CHECK: %[[TMP_0:.*]] = memref.alloca() : memref<2xi8> -// CHECK: %[[TMP_1:.*]] = memref.cast %[[TMP_0]] : memref<2xi8> to memref -// CHECK: memref.store %[[TMP_c8_i8]], %[[TMP_0]][%[[TMP_c0]]] : memref<2xi8> -// CHECK: memref.store %[[TMP_c8_i8]], %[[TMP_0]][%[[TMP_c1]]] : memref<2xi8> -// CHECK: %[[TMP_2:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[TMP_3:.*]] = memref.cast %[[TMP_2]] : memref<2xindex> to memref -// CHECK: memref.store %[[TMP_c5]], %[[TMP_2]][%[[TMP_c0]]] : memref<2xindex> -// CHECK: memref.store %[[TMP_c4]], %[[TMP_2]][%[[TMP_c1]]] : memref<2xindex> -// CHECK: %[[TMP_4:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[TMP_5:.*]] = memref.cast %[[TMP_4]] : memref<2xindex> to memref -// CHECK: memref.store %[[TMP_c0]], %[[TMP_4]][%[[TMP_c0]]] : memref<2xindex> -// CHECK: memref.store %[[TMP_c1]], %[[TMP_4]][%[[TMP_c1]]] : memref<2xindex> -// CHECK: %[[TMP_6:.*]] = llvm.mlir.null : !llvm.ptr -// CHECK: %[[TMP_7:.*]] = call @newSparseTensor(%[[TMP_1]], %[[TMP_3]], %[[TMP_5]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c4_i32]], %[[TMP_6]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK-DAG: %[[LvlTypes_0:.*]] = memref.alloca() : memref<2xi8> +// CHECK-DAG: %[[LvlTypesP_0:.*]] = memref.cast %[[LvlTypes_0]] : memref<2xi8> to memref +// CHECK-DAG: memref.store %[[TMP_c8_i8]], %[[LvlTypes_0]][%[[TMP_c0]]] : memref<2xi8> +// CHECK-DAG: memref.store %[[TMP_c8_i8]], %[[LvlTypes_0]][%[[TMP_c1]]] : memref<2xi8> +// CHECK-DAG: %[[DimSizes_0:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[DimSizesP_0:.*]] = memref.cast %[[DimSizes_0]] : memref<2xindex> to memref +// CHECK-DAG: memref.store %[[TMP_c5]], %[[DimSizes_0]][%[[TMP_c0]]] : memref<2xindex> +// CHECK-DAG: memref.store %[[TMP_c4]], %[[DimSizes_0]][%[[TMP_c1]]] : memref<2xindex> +// CHECK-DAG: %[[LvlSizes_0:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[LvlSizesP_0:.*]] = memref.cast %[[LvlSizes_0]] : memref<2xindex> to memref +// CHECK-DAG: %[[Iota_0:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[IotaP_0:.*]] = memref.cast %[[Iota_0]] : memref<2xindex> to memref +// CHECK-DAG: memref.store %[[TMP_c0]], %[[Iota_0]][%[[TMP_c0]]] : memref<2xindex> +// CHECK-DAG: memref.store %[[TMP_c1]], %[[Iota_0]][%[[TMP_c1]]] : memref<2xindex> +// CHECK-DAG: %[[NullPtr:.*]] = llvm.mlir.null : !llvm.ptr +// CHECK: %[[TMP_7:.*]] = call @newSparseTensor(%[[DimSizesP_0]], %[[LvlSizesP_0]], %[[LvlTypesP_0]], %[[IotaP_0]], %[[IotaP_0]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c4_i32]], %[[NullPtr]]) // CHECK: %[[TMP_8:.*]] = memref.alloca() : memref // CHECK: %[[TMP_9:.*]] = memref.alloca() : memref<2xindex> // CHECK: %[[TMP_10:.*]] = memref.cast %[[TMP_9]] : memref<2xindex> to memref @@ -109,23 +113,25 @@ // CHECK: %[[TMP_23:.*]] = arith.cmpf une, %[[TMP_22]], %[[TMP_cst]] : f64 // CHECK: scf.if %[[TMP_23]] { // CHECK: memref.store %[[TMP_22]], %[[TMP_8]][] : memref -// CHECK: %[[TMP_24:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_8]], %[[TMP_10]], %[[TMP_5]]) : (!llvm.ptr, memref, memref, memref) -> !llvm.ptr +// CHECK: %[[TMP_24:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_8]], %[[TMP_10]], %[[IotaP_0]]) : (!llvm.ptr, memref, memref, memref) -> !llvm.ptr // CHECK: } // CHECK: } // CHECK: } -// CHECK: %[[TMP_11:.*]] = memref.alloca() : memref<2xi8> -// CHECK: %[[TMP_12:.*]] = memref.cast %[[TMP_11]] : memref<2xi8> to memref -// CHECK: memref.store %[[TMP_c8_i8]], %[[TMP_11]][%[[TMP_c0]]] : memref<2xi8> -// CHECK: memref.store %[[TMP_c8_i8]], %[[TMP_11]][%[[TMP_c1]]] : memref<2xi8> -// CHECK: %[[TMP_13:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[TMP_14:.*]] = memref.cast %[[TMP_13]] : memref<2xindex> to memref -// CHECK: memref.store %[[TMP_c3]], %[[TMP_13]][%[[TMP_c0]]] : memref<2xindex> -// CHECK: memref.store %[[TMP_c4]], %[[TMP_13]][%[[TMP_c1]]] : memref<2xindex> -// CHECK: %[[TMP_15:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[TMP_16:.*]] = memref.cast %[[TMP_15]] : memref<2xindex> to memref -// CHECK: memref.store %[[TMP_c0]], %[[TMP_15]][%[[TMP_c0]]] : memref<2xindex> -// CHECK: memref.store %[[TMP_c1]], %[[TMP_15]][%[[TMP_c1]]] : memref<2xindex> -// CHECK: %[[TMP_17:.*]] = call @newSparseTensor(%[[TMP_12]], %[[TMP_14]], %[[TMP_16]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK-DAG: %[[LvlTypes_1:.*]] = memref.alloca() : memref<2xi8> +// CHECK-DAG: %[[LvlTypesP_1:.*]] = memref.cast %[[LvlTypes_1]] : memref<2xi8> to memref +// CHECK-DAG: memref.store %[[TMP_c8_i8]], %[[LvlTypes_1]][%[[TMP_c0]]] : memref<2xi8> +// CHECK-DAG: memref.store %[[TMP_c8_i8]], %[[LvlTypes_1]][%[[TMP_c1]]] : memref<2xi8> +// CHECK-DAG: %[[DimSizes_1:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[DimSizesP_1:.*]] = memref.cast %[[DimSizes_1]] : memref<2xindex> to memref +// CHECK-DAG: memref.store %[[TMP_c3]], %[[DimSizes_1]][%[[TMP_c0]]] : memref<2xindex> +// CHECK-DAG: memref.store %[[TMP_c4]], %[[DimSizes_1]][%[[TMP_c1]]] : memref<2xindex> +// CHECK-DAG: %[[LvlSizes_1:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[LvlSizesP_1:.*]] = memref.cast %[[LvlSizes_1]] : memref<2xindex> to memref +// CHECK-DAG: %[[Iota_1:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[IotaP_1:.*]] = memref.cast %[[Iota_1]] : memref<2xindex> to memref +// CHECK-DAG: memref.store %[[TMP_c0]], %[[Iota_1]][%[[TMP_c0]]] : memref<2xindex> +// CHECK-DAG: memref.store %[[TMP_c1]], %[[Iota_1]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_17:.*]] = call @newSparseTensor(%[[DimSizesP_1]], %[[LvlSizesP_1]], %[[LvlTypesP_1]], %[[IotaP_1]], %[[IotaP_1]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) // CHECK: %[[TMP_18:.*]] = memref.alloca() : memref<2xindex> // CHECK: %[[TMP_19:.*]] = memref.cast %[[TMP_18]] : memref<2xindex> to memref // CHECK: %[[TMP_20:.*]] = memref.alloca() : memref @@ -138,11 +144,11 @@ // CHECK: %[[TMP_24:.*]] = memref.load %[[TMP_18]][%[[TMP_c1]]] : memref<2xindex> // CHECK: memref.store %[[TMP_23]], %[[TMP_9]][%[[TMP_c0]]] : memref<2xindex> // CHECK: memref.store %[[TMP_24]], %[[TMP_9]][%[[TMP_c1]]] : memref<2xindex> -// CHECK: %[[TMP_25:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_20]], %[[TMP_10]], %[[TMP_5]]) : (!llvm.ptr, memref, memref, memref) -> !llvm.ptr +// CHECK: %[[TMP_25:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_20]], %[[TMP_10]], %[[IotaP_0]]) : (!llvm.ptr, memref, memref, memref) -> !llvm.ptr // CHECK: scf.yield // CHECK: } // CHECK: call @delSparseTensorIteratorF64(%[[TMP_17]]) : (!llvm.ptr) -> () -// CHECK: %[[TMP_21:.*]] = call @newSparseTensor(%[[TMP_1]], %[[TMP_3]], %[[TMP_5]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c2_i32]], %[[TMP_7]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK: %[[TMP_21:.*]] = call @newSparseTensor(%[[DimSizesP_0]], %[[LvlSizesP_0]], %[[LvlTypesP_0]], %[[IotaP_0]], %[[IotaP_0]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c2_i32]], %[[TMP_7]]) // CHECK: call @delSparseTensorCOOF64(%[[TMP_7]]) : (!llvm.ptr) -> () // CHECK: return %[[TMP_21]] : !llvm.ptr // CHECK: } @@ -168,20 +174,24 @@ // CHECK-DAG: %[[TMP_c4:.*]] = arith.constant 4 : index // CHECK-DAG: %[[TMP_c5:.*]] = arith.constant 5 : index // CHECK-DAG: %[[TMP_c8_i8:.*]] = arith.constant 8 : i8 -// CHECK: %[[TMP_0:.*]] = memref.alloca() : memref<2xi8> -// CHECK: %[[TMP_1:.*]] = memref.cast %[[TMP_0]] : memref<2xi8> to memref -// CHECK: memref.store %[[TMP_c8_i8]], %[[TMP_0]][%[[TMP_c0]]] : memref<2xi8> -// CHECK: memref.store %[[TMP_c8_i8]], %[[TMP_0]][%[[TMP_c1]]] : memref<2xi8> -// CHECK: %[[TMP_2:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[TMP_3:.*]] = memref.cast %[[TMP_2]] : memref<2xindex> to memref -// CHECK: memref.store %[[TMP_c4]], %[[TMP_2]][%[[TMP_c0]]] : memref<2xindex> -// CHECK: memref.store %[[TMP_c5]], %[[TMP_2]][%[[TMP_c1]]] : memref<2xindex> -// CHECK: %[[TMP_4:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[TMP_5:.*]] = memref.cast %[[TMP_4]] : memref<2xindex> to memref -// CHECK: memref.store %[[TMP_c1]], %[[TMP_4]][%[[TMP_c0]]] : memref<2xindex> -// CHECK: memref.store %[[TMP_c0]], %[[TMP_4]][%[[TMP_c1]]] : memref<2xindex> -// CHECK: %[[TMP_6:.*]] = llvm.mlir.null : !llvm.ptr -// CHECK: %[[TMP_7:.*]] = call @newSparseTensor(%[[TMP_1]], %[[TMP_3]], %[[TMP_5]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c4_i32]], %[[TMP_6]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK-DAG: %[[LvlTypes_0:.*]] = memref.alloca() : memref<2xi8> +// CHECK-DAG: %[[LvlTypesP_0:.*]] = memref.cast %[[LvlTypes_0]] : memref<2xi8> to memref +// CHECK-DAG: memref.store %[[TMP_c8_i8]], %[[LvlTypes_0]][%[[TMP_c0]]] : memref<2xi8> +// CHECK-DAG: memref.store %[[TMP_c8_i8]], %[[LvlTypes_0]][%[[TMP_c1]]] : memref<2xi8> +// CHECK-DAG: %[[DimSizes_0:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[DimSizesP_0:.*]] = memref.cast %[[DimSizes_0]] : memref<2xindex> to memref +// CHECK-DAG: memref.store %[[TMP_c4]], %[[DimSizes_0]][%[[TMP_c0]]] : memref<2xindex> +// CHECK-DAG: memref.store %[[TMP_c5]], %[[DimSizes_0]][%[[TMP_c1]]] : memref<2xindex> +// CHECK-DAG: %[[LvlSizes_0:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[LvlSizesP_0:.*]] = memref.cast %[[LvlSizes_0]] : memref<2xindex> to memref +// CHECK-DAG: %[[Lvl2Dim_0:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[Lvl2DimP_0:.*]] = memref.cast %[[Lvl2Dim_0]] : memref<2xindex> to memref +// CHECK-DAG: %[[Dim2Lvl_0:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[Dim2LvlP_0:.*]] = memref.cast %[[Dim2Lvl_0]] : memref<2xindex> to memref +// CHECK-DAG: memref.store %[[TMP_c1]], %[[Dim2Lvl_0]][%[[TMP_c0]]] : memref<2xindex> +// CHECK-DAG: memref.store %[[TMP_c0]], %[[Dim2Lvl_0]][%[[TMP_c1]]] : memref<2xindex> +// CHECK-DAG: %[[NullPtr:.*]] = llvm.mlir.null : !llvm.ptr +// CHECK: %[[TMP_7:.*]] = call @newSparseTensor(%[[DimSizesP_0]], %[[LvlSizesP_0]], %[[LvlTypesP_0]], %[[Lvl2DimP_0]], %[[Dim2LvlP_0]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c4_i32]], %[[NullPtr]]) // CHECK: %[[TMP_8:.*]] = memref.alloca() : memref // CHECK: %[[TMP_9:.*]] = memref.alloca() : memref<2xindex> // CHECK: %[[TMP_10:.*]] = memref.cast %[[TMP_9]] : memref<2xindex> to memref @@ -193,23 +203,25 @@ // CHECK: %[[TMP_23:.*]] = arith.cmpf une, %[[TMP_22]], %[[TMP_cst]] : f64 // CHECK: scf.if %[[TMP_23]] { // CHECK: memref.store %[[TMP_22]], %[[TMP_8]][] : memref -// CHECK: %[[TMP_24:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_8]], %[[TMP_10]], %[[TMP_5]]) : (!llvm.ptr, memref, memref, memref) -> !llvm.ptr +// CHECK: %[[TMP_24:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_8]], %[[TMP_10]], %[[Dim2LvlP_0]]) : (!llvm.ptr, memref, memref, memref) -> !llvm.ptr // CHECK: } // CHECK: } // CHECK: } -// CHECK: %[[TMP_11:.*]] = memref.alloca() : memref<2xi8> -// CHECK: %[[TMP_12:.*]] = memref.cast %[[TMP_11]] : memref<2xi8> to memref -// CHECK: memref.store %[[TMP_c8_i8]], %[[TMP_11]][%[[TMP_c0]]] : memref<2xi8> -// CHECK: memref.store %[[TMP_c8_i8]], %[[TMP_11]][%[[TMP_c1]]] : memref<2xi8> -// CHECK: %[[TMP_13:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[TMP_14:.*]] = memref.cast %[[TMP_13]] : memref<2xindex> to memref -// CHECK: memref.store %[[TMP_c4]], %[[TMP_13]][%[[TMP_c0]]] : memref<2xindex> -// CHECK: memref.store %[[TMP_c3]], %[[TMP_13]][%[[TMP_c1]]] : memref<2xindex> -// CHECK: %[[TMP_15:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[TMP_16:.*]] = memref.cast %[[TMP_15]] : memref<2xindex> to memref -// CHECK: memref.store %[[TMP_c0]], %[[TMP_15]][%[[TMP_c0]]] : memref<2xindex> -// CHECK: memref.store %[[TMP_c1]], %[[TMP_15]][%[[TMP_c1]]] : memref<2xindex> -// CHECK: %[[TMP_17:.*]] = call @newSparseTensor(%[[TMP_12]], %[[TMP_14]], %[[TMP_16]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK-DAG: %[[LvlTypes_1:.*]] = memref.alloca() : memref<2xi8> +// CHECK-DAG: %[[LvlTypesP_1:.*]] = memref.cast %[[LvlTypes_1]] : memref<2xi8> to memref +// CHECK-DAG: memref.store %[[TMP_c8_i8]], %[[LvlTypes_1]][%[[TMP_c0]]] : memref<2xi8> +// CHECK-DAG: memref.store %[[TMP_c8_i8]], %[[LvlTypes_1]][%[[TMP_c1]]] : memref<2xi8> +// CHECK-DAG: %[[DimSizes_1:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[DimSizesP_1:.*]] = memref.cast %[[DimSizes_1]] : memref<2xindex> to memref +// CHECK-DAG: memref.store %[[TMP_c4]], %[[DimSizes_1]][%[[TMP_c0]]] : memref<2xindex> +// CHECK-DAG: memref.store %[[TMP_c3]], %[[DimSizes_1]][%[[TMP_c1]]] : memref<2xindex> +// CHECK-DAG: %[[LvlSizes_1:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[LvlSizesP_1:.*]] = memref.cast %[[LvlSizes_1]] : memref<2xindex> to memref +// CHECK-DAG: %[[Iota_1:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[IotaP_1:.*]] = memref.cast %[[Iota_1]] : memref<2xindex> to memref +// CHECK-DAG: memref.store %[[TMP_c0]], %[[Iota_1]][%[[TMP_c0]]] : memref<2xindex> +// CHECK-DAG: memref.store %[[TMP_c1]], %[[Iota_1]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_17:.*]] = call @newSparseTensor(%[[DimSizesP_1]], %[[LvlSizesP_1]], %[[LvlTypesP_1]], %[[IotaP_1]], %[[IotaP_1]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) // CHECK: %[[TMP_18:.*]] = memref.alloca() : memref<2xindex> // CHECK: %[[TMP_19:.*]] = memref.cast %[[TMP_18]] : memref<2xindex> to memref // CHECK: %[[TMP_20:.*]] = memref.alloca() : memref @@ -222,11 +234,11 @@ // CHECK: %[[TMP_24:.*]] = arith.addi %[[TMP_23]], %[[TMP_c2]] : index // CHECK: memref.store %[[TMP_22]], %[[TMP_9]][%[[TMP_c0]]] : memref<2xindex> // CHECK: memref.store %[[TMP_24]], %[[TMP_9]][%[[TMP_c1]]] : memref<2xindex> -// CHECK: %[[TMP_25:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_20]], %[[TMP_10]], %[[TMP_5]]) : (!llvm.ptr, memref, memref, memref) -> !llvm.ptr +// CHECK: %[[TMP_25:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_20]], %[[TMP_10]], %[[Dim2LvlP_0]]) : (!llvm.ptr, memref, memref, memref) -> !llvm.ptr // CHECK: scf.yield // CHECK: } // CHECK: call @delSparseTensorIteratorF64(%[[TMP_17]]) : (!llvm.ptr) -> () -// CHECK: %[[TMP_21:.*]] = call @newSparseTensor(%[[TMP_1]], %[[TMP_3]], %[[TMP_5]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c2_i32]], %[[TMP_7]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK: %[[TMP_21:.*]] = call @newSparseTensor(%[[DimSizesP_0]], %[[LvlSizesP_0]], %[[LvlTypesP_0]], %[[Lvl2DimP_0]], %[[Dim2LvlP_0]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c2_i32]], %[[TMP_7]]) // CHECK: call @delSparseTensorCOOF64(%[[TMP_7]]) : (!llvm.ptr) -> () // CHECK: return %[[TMP_21]] : !llvm.ptr // CHECK: } @@ -260,19 +272,21 @@ // CHECK: } // CHECK: } // CHECK: } -// CHECK: %[[TMP_1:.*]] = memref.alloca() : memref<2xi8> -// CHECK: %[[TMP_2:.*]] = memref.cast %[[TMP_1]] : memref<2xi8> to memref -// CHECK: memref.store %[[TMP_c8_i8]], %[[TMP_1]][%[[TMP_c0]]] : memref<2xi8> -// CHECK: memref.store %[[TMP_c8_i8]], %[[TMP_1]][%[[TMP_c1]]] : memref<2xi8> -// CHECK: %[[TMP_3:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[TMP_4:.*]] = memref.cast %[[TMP_3]] : memref<2xindex> to memref -// CHECK: memref.store %[[TMP_c4]], %[[TMP_3]][%[[TMP_c0]]] : memref<2xindex> -// CHECK: memref.store %[[TMP_c3]], %[[TMP_3]][%[[TMP_c1]]] : memref<2xindex> -// CHECK: %[[TMP_5:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[TMP_6:.*]] = memref.cast %[[TMP_5]] : memref<2xindex> to memref -// CHECK: memref.store %[[TMP_c0]], %[[TMP_5]][%[[TMP_c0]]] : memref<2xindex> -// CHECK: memref.store %[[TMP_c1]], %[[TMP_5]][%[[TMP_c1]]] : memref<2xindex> -// CHECK: %[[TMP_7:.*]] = call @newSparseTensor(%[[TMP_2]], %[[TMP_4]], %[[TMP_6]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<2xi8> +// CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<2xi8> to memref +// CHECK-DAG: memref.store %[[TMP_c8_i8]], %[[LvlTypes]][%[[TMP_c0]]] : memref<2xi8> +// CHECK-DAG: memref.store %[[TMP_c8_i8]], %[[LvlTypes]][%[[TMP_c1]]] : memref<2xi8> +// CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<2xindex> to memref +// CHECK-DAG: memref.store %[[TMP_c4]], %[[DimSizes]][%[[TMP_c0]]] : memref<2xindex> +// CHECK-DAG: memref.store %[[TMP_c3]], %[[DimSizes]][%[[TMP_c1]]] : memref<2xindex> +// CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<2xindex> to memref +// CHECK-DAG: %[[Iota:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<2xindex> to memref +// CHECK-DAG: memref.store %[[TMP_c0]], %[[Iota]][%[[TMP_c0]]] : memref<2xindex> +// CHECK-DAG: memref.store %[[TMP_c1]], %[[Iota]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_7:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) // CHECK: %[[TMP_8:.*]] = memref.alloca() : memref<2xindex> // CHECK: %[[TMP_9:.*]] = memref.cast %[[TMP_8]] : memref<2xindex> to memref // CHECK: %[[TMP_10:.*]] = memref.alloca() : memref @@ -321,19 +335,21 @@ // CHECK: } // CHECK: } // CHECK: } -// CHECK: %[[TMP_2:.*]] = memref.alloca() : memref<2xi8> -// CHECK: %[[TMP_3:.*]] = memref.cast %[[TMP_2]] : memref<2xi8> to memref -// CHECK: memref.store %[[TMP_c8_i8]], %[[TMP_2]][%[[TMP_c0]]] : memref<2xi8> -// CHECK: memref.store %[[TMP_c8_i8]], %[[TMP_2]][%[[TMP_c1]]] : memref<2xi8> -// CHECK: %[[TMP_4:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[TMP_5:.*]] = memref.cast %[[TMP_4]] : memref<2xindex> to memref -// CHECK: memref.store %[[TMP_c3]], %[[TMP_4]][%[[TMP_c0]]] : memref<2xindex> -// CHECK: memref.store %[[TMP_c3]], %[[TMP_4]][%[[TMP_c1]]] : memref<2xindex> -// CHECK: %[[TMP_6:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[TMP_7:.*]] = memref.cast %[[TMP_6]] : memref<2xindex> to memref -// CHECK: memref.store %[[TMP_c0]], %[[TMP_6]][%[[TMP_c0]]] : memref<2xindex> -// CHECK: memref.store %[[TMP_c1]], %[[TMP_6]][%[[TMP_c1]]] : memref<2xindex> -// CHECK: %[[TMP_8:.*]] = call @newSparseTensor(%[[TMP_3]], %[[TMP_5]], %[[TMP_7]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<2xi8> +// CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<2xi8> to memref +// CHECK-DAG: memref.store %[[TMP_c8_i8]], %[[LvlTypes]][%[[TMP_c0]]] : memref<2xi8> +// CHECK-DAG: memref.store %[[TMP_c8_i8]], %[[LvlTypes]][%[[TMP_c1]]] : memref<2xi8> +// CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<2xindex> to memref +// CHECK-DAG: memref.store %[[TMP_c3]], %[[DimSizes]][%[[TMP_c0]]] : memref<2xindex> +// CHECK-DAG: memref.store %[[TMP_c3]], %[[DimSizes]][%[[TMP_c1]]] : memref<2xindex> +// CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<2xindex> to memref +// CHECK-DAG: %[[Iota:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<2xindex> to memref +// CHECK-DAG: memref.store %[[TMP_c0]], %[[Iota]][%[[TMP_c0]]] : memref<2xindex> +// CHECK-DAG: memref.store %[[TMP_c1]], %[[Iota]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_8:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) // CHECK: %[[TMP_9:.*]] = memref.alloca() : memref<2xindex> // CHECK: %[[TMP_10:.*]] = memref.cast %[[TMP_9]] : memref<2xindex> to memref // CHECK: %[[TMP_11:.*]] = memref.alloca() : memref diff --git a/mlir/test/Dialect/SparseTensor/sparse_expand.mlir b/mlir/test/Dialect/SparseTensor/sparse_expand.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_expand.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_expand.mlir @@ -47,9 +47,9 @@ // // CHECK-CONVERT-LABEL: func @kernel( // CHECK-CONVERT: %[[C:.*]] = arith.constant 0 : index -// CHECK-CONVERT: %{{.*}} = call @sparseDimSize +// CHECK-CONVERT: %{{.*}} = call @sparseLvlSize // CHECK-CONVERT: %[[N:.*]] = call @newSparseTensor -// CHECK-CONVERT: %[[S:.*]] = call @sparseDimSize(%[[N]], %[[C]]) +// CHECK-CONVERT: %[[S:.*]] = call @sparseLvlSize(%[[N]], %[[C]]) // CHECK-CONVERT: %[[A:.*]] = memref.alloc(%[[S]]) : memref // CHECK-CONVERT: %[[B:.*]] = memref.alloc(%[[S]]) : memref // CHECK-CONVERT: %[[C:.*]] = memref.alloc(%[[S]]) : memref diff --git a/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir b/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir @@ -3,60 +3,64 @@ #DCSR = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }> // CHECK-LABEL: func.func @fill_zero_after_alloc( -// CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr, -// CHECK-SAME: %[[VAL_1:.*]]: !llvm.ptr) -> !llvm.ptr { -// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f64 -// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : i32 -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : i32 -// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[VAL_7:.*]] = arith.constant false -// CHECK-DAG: %[[VAL_8:.*]] = arith.constant true -// CHECK-DAG: %[[VAL_9:.*]] = arith.constant 100 : index -// CHECK-DAG: %[[VAL_10:.*]] = arith.constant 300 : index -// CHECK-DAG: %[[VAL_11:.*]] = arith.constant 8 : i8 -// CHECK: %[[VAL_12:.*]] = memref.alloca() : memref<2xi8> -// CHECK: %[[VAL_13:.*]] = memref.cast %[[VAL_12]] : memref<2xi8> to memref -// CHECK: memref.store %[[VAL_11]], %[[VAL_12]]{{\[}}%[[VAL_5]]] : memref<2xi8> -// CHECK: memref.store %[[VAL_11]], %[[VAL_12]]{{\[}}%[[VAL_6]]] : memref<2xi8> -// CHECK: %[[VAL_14:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref -// CHECK: memref.store %[[VAL_9]], %[[VAL_14]]{{\[}}%[[VAL_5]]] : memref<2xindex> -// CHECK: memref.store %[[VAL_10]], %[[VAL_14]]{{\[}}%[[VAL_6]]] : memref<2xindex> -// CHECK: %[[VAL_16:.*]] = memref.alloca() : memref<2xindex> -// CHECK: %[[VAL_17:.*]] = memref.cast %[[VAL_16]] : memref<2xindex> to memref -// CHECK: memref.store %[[VAL_5]], %[[VAL_16]]{{\[}}%[[VAL_5]]] : memref<2xindex> -// CHECK: memref.store %[[VAL_6]], %[[VAL_16]]{{\[}}%[[VAL_6]]] : memref<2xindex> -// CHECK: %[[VAL_18:.*]] = llvm.mlir.null : !llvm.ptr -// CHECK: %[[VAL_19:.*]] = call @newSparseTensor(%[[VAL_13]], %[[VAL_15]], %[[VAL_17]], %[[VAL_4]], %[[VAL_4]], %[[VAL_3]], %[[VAL_4]], %[[VAL_18]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK-SAME: %[[Arg0:.*]]: !llvm.ptr, +// CHECK-SAME: %[[Arg1:.*]]: !llvm.ptr) -> !llvm.ptr { +// CHECK-DAG: %[[F0:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : i32 +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[I0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[I1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[False:.*]] = arith.constant false +// CHECK-DAG: %[[True:.*]] = arith.constant true +// CHECK-DAG: %[[I100:.*]] = arith.constant 100 : index +// CHECK-DAG: %[[I300:.*]] = arith.constant 300 : index +// CHECK-DAG: %[[CompressedDLT:.*]] = arith.constant 8 : i8 +// CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<2xi8> +// CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<2xi8> to memref +// CHECK-DAG: memref.store %[[CompressedDLT]], %[[LvlTypes]]{{\[}}%[[I0]]] : memref<2xi8> +// CHECK-DAG: memref.store %[[CompressedDLT]], %[[LvlTypes]]{{\[}}%[[I1]]] : memref<2xi8> +// CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<2xindex> to memref +// CHECK-DAG: memref.store %[[I100]], %[[DimSizes]]{{\[}}%[[I0]]] : memref<2xindex> +// CHECK-DAG: memref.store %[[I300]], %[[DimSizes]]{{\[}}%[[I1]]] : memref<2xindex> +// CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<2xindex> to memref +// CHECK-DAG: memref.store %[[I100]], %[[LvlSizes]]{{\[}}%[[I0]]] : memref<2xindex> +// CHECK-DAG: memref.store %[[I300]], %[[LvlSizes]]{{\[}}%[[I1]]] : memref<2xindex> +// CHECK-DAG: %[[Iota:.*]] = memref.alloca() : memref<2xindex> +// CHECK-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<2xindex> to memref +// CHECK-DAG: memref.store %[[I0]], %[[Iota]]{{\[}}%[[I0]]] : memref<2xindex> +// CHECK-DAG: memref.store %[[I1]], %[[Iota]]{{\[}}%[[I1]]] : memref<2xindex> +// CHECK-DAG: %[[NullPtr:.*]] = llvm.mlir.null : !llvm.ptr +// CHECK: %[[VAL_19:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %[[C0]], %[[C0]], %[[C1]], %[[C0]], %[[NullPtr]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr // CHECK: %[[VAL_20:.*]] = memref.alloc() : memref<300xf64> // CHECK: %[[VAL_21:.*]] = memref.cast %[[VAL_20]] : memref<300xf64> to memref // CHECK: %[[VAL_22:.*]] = memref.alloc() : memref<300xi1> // CHECK: %[[VAL_23:.*]] = memref.cast %[[VAL_22]] : memref<300xi1> to memref // CHECK: %[[VAL_24:.*]] = memref.alloc() : memref<300xindex> // CHECK: %[[VAL_25:.*]] = memref.cast %[[VAL_24]] : memref<300xindex> to memref -// CHECK: linalg.fill ins(%[[VAL_2]] : f64) outs(%[[VAL_20]] : memref<300xf64>) -// CHECK: linalg.fill ins(%[[VAL_7]] : i1) outs(%[[VAL_22]] : memref<300xi1>) -// CHECK: %[[VAL_26:.*]] = call @sparsePointers0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref -// CHECK: %[[VAL_27:.*]] = call @sparseIndices0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref -// CHECK: %[[VAL_28:.*]] = call @sparsePointers0(%[[VAL_0]], %[[VAL_6]]) : (!llvm.ptr, index) -> memref -// CHECK: %[[VAL_29:.*]] = call @sparseIndices0(%[[VAL_0]], %[[VAL_6]]) : (!llvm.ptr, index) -> memref -// CHECK: %[[VAL_30:.*]] = call @sparseValuesF64(%[[VAL_0]]) : (!llvm.ptr) -> memref -// CHECK: %[[VAL_31:.*]] = call @sparsePointers0(%[[VAL_1]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref -// CHECK: %[[VAL_32:.*]] = call @sparseIndices0(%[[VAL_1]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref -// CHECK: %[[VAL_33:.*]] = call @sparsePointers0(%[[VAL_1]], %[[VAL_6]]) : (!llvm.ptr, index) -> memref -// CHECK: %[[VAL_34:.*]] = call @sparseIndices0(%[[VAL_1]], %[[VAL_6]]) : (!llvm.ptr, index) -> memref -// CHECK: %[[VAL_35:.*]] = call @sparseValuesF64(%[[VAL_1]]) : (!llvm.ptr) -> memref -// CHECK: %[[VAL_36:.*]] = memref.load %[[VAL_26]]{{\[}}%[[VAL_5]]] : memref -// CHECK: %[[VAL_37:.*]] = memref.load %[[VAL_26]]{{\[}}%[[VAL_6]]] : memref -// CHECK: scf.for %[[VAL_38:.*]] = %[[VAL_36]] to %[[VAL_37]] step %[[VAL_6]] { +// CHECK: linalg.fill ins(%[[F0]] : f64) outs(%[[VAL_20]] : memref<300xf64>) +// CHECK: linalg.fill ins(%[[False]] : i1) outs(%[[VAL_22]] : memref<300xi1>) +// CHECK: %[[VAL_26:.*]] = call @sparsePointers0(%[[Arg0]], %[[I0]]) : (!llvm.ptr, index) -> memref +// CHECK: %[[VAL_27:.*]] = call @sparseIndices0(%[[Arg0]], %[[I0]]) : (!llvm.ptr, index) -> memref +// CHECK: %[[VAL_28:.*]] = call @sparsePointers0(%[[Arg0]], %[[I1]]) : (!llvm.ptr, index) -> memref +// CHECK: %[[VAL_29:.*]] = call @sparseIndices0(%[[Arg0]], %[[I1]]) : (!llvm.ptr, index) -> memref +// CHECK: %[[VAL_30:.*]] = call @sparseValuesF64(%[[Arg0]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_31:.*]] = call @sparsePointers0(%[[Arg1]], %[[I0]]) : (!llvm.ptr, index) -> memref +// CHECK: %[[VAL_32:.*]] = call @sparseIndices0(%[[Arg1]], %[[I0]]) : (!llvm.ptr, index) -> memref +// CHECK: %[[VAL_33:.*]] = call @sparsePointers0(%[[Arg1]], %[[I1]]) : (!llvm.ptr, index) -> memref +// CHECK: %[[VAL_34:.*]] = call @sparseIndices0(%[[Arg1]], %[[I1]]) : (!llvm.ptr, index) -> memref +// CHECK: %[[VAL_35:.*]] = call @sparseValuesF64(%[[Arg1]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_36:.*]] = memref.load %[[VAL_26]]{{\[}}%[[I0]]] : memref +// CHECK: %[[VAL_37:.*]] = memref.load %[[VAL_26]]{{\[}}%[[I1]]] : memref +// CHECK: scf.for %[[VAL_38:.*]] = %[[VAL_36]] to %[[VAL_37]] step %[[I1]] { // CHECK: %[[VAL_39:.*]] = memref.load %[[VAL_27]]{{\[}}%[[VAL_38]]] : memref // CHECK: %[[VAL_40:.*]] = memref.load %[[VAL_28]]{{\[}}%[[VAL_38]]] : memref -// CHECK: %[[VAL_41:.*]] = arith.addi %[[VAL_38]], %[[VAL_6]] : index +// CHECK: %[[VAL_41:.*]] = arith.addi %[[VAL_38]], %[[I1]] : index // CHECK: %[[VAL_42:.*]] = memref.load %[[VAL_28]]{{\[}}%[[VAL_41]]] : memref -// CHECK: %[[VAL_43:.*]] = memref.load %[[VAL_31]]{{\[}}%[[VAL_5]]] : memref -// CHECK: %[[VAL_44:.*]] = memref.load %[[VAL_31]]{{\[}}%[[VAL_6]]] : memref -// CHECK: %[[VAL_45:.*]]:3 = scf.while (%[[VAL_46:.*]] = %[[VAL_40]], %[[VAL_47:.*]] = %[[VAL_43]], %[[VAL_48:.*]] = %[[VAL_5]]) : (index, index, index) -> (index, index, index) { +// CHECK: %[[VAL_43:.*]] = memref.load %[[VAL_31]]{{\[}}%[[I0]]] : memref +// CHECK: %[[VAL_44:.*]] = memref.load %[[VAL_31]]{{\[}}%[[I1]]] : memref +// CHECK: %[[VAL_45:.*]]:3 = scf.while (%[[VAL_46:.*]] = %[[VAL_40]], %[[VAL_47:.*]] = %[[VAL_43]], %[[VAL_48:.*]] = %[[I0]]) : (index, index, index) -> (index, index, index) { // CHECK: %[[VAL_49:.*]] = arith.cmpi ult, %[[VAL_46]], %[[VAL_42]] : index // CHECK: %[[VAL_50:.*]] = arith.cmpi ult, %[[VAL_47]], %[[VAL_44]] : index // CHECK: %[[VAL_51:.*]] = arith.andi %[[VAL_49]], %[[VAL_50]] : i1 @@ -73,20 +77,20 @@ // CHECK: %[[VAL_62:.*]] = scf.if %[[VAL_61]] -> (index) { // CHECK: %[[VAL_63:.*]] = memref.load %[[VAL_30]]{{\[}}%[[VAL_52]]] : memref // CHECK: %[[VAL_64:.*]] = memref.load %[[VAL_33]]{{\[}}%[[VAL_53]]] : memref -// CHECK: %[[VAL_65:.*]] = arith.addi %[[VAL_53]], %[[VAL_6]] : index +// CHECK: %[[VAL_65:.*]] = arith.addi %[[VAL_53]], %[[I1]] : index // CHECK: %[[VAL_66:.*]] = memref.load %[[VAL_33]]{{\[}}%[[VAL_65]]] : memref -// CHECK: %[[VAL_67:.*]] = scf.for %[[VAL_68:.*]] = %[[VAL_64]] to %[[VAL_66]] step %[[VAL_6]] iter_args(%[[VAL_69:.*]] = %[[VAL_54]]) -> (index) { +// CHECK: %[[VAL_67:.*]] = scf.for %[[VAL_68:.*]] = %[[VAL_64]] to %[[VAL_66]] step %[[I1]] iter_args(%[[VAL_69:.*]] = %[[VAL_54]]) -> (index) { // CHECK: %[[VAL_70:.*]] = memref.load %[[VAL_34]]{{\[}}%[[VAL_68]]] : memref // CHECK: %[[VAL_71:.*]] = memref.load %[[VAL_20]]{{\[}}%[[VAL_70]]] : memref<300xf64> // CHECK: %[[VAL_72:.*]] = memref.load %[[VAL_35]]{{\[}}%[[VAL_68]]] : memref // CHECK: %[[VAL_73:.*]] = arith.mulf %[[VAL_63]], %[[VAL_72]] : f64 // CHECK: %[[VAL_74:.*]] = arith.addf %[[VAL_71]], %[[VAL_73]] : f64 // CHECK: %[[VAL_75:.*]] = memref.load %[[VAL_22]]{{\[}}%[[VAL_70]]] : memref<300xi1> -// CHECK: %[[VAL_76:.*]] = arith.cmpi eq, %[[VAL_75]], %[[VAL_7]] : i1 +// CHECK: %[[VAL_76:.*]] = arith.cmpi eq, %[[VAL_75]], %[[False]] : i1 // CHECK: %[[VAL_77:.*]] = scf.if %[[VAL_76]] -> (index) { -// CHECK: memref.store %[[VAL_8]], %[[VAL_22]]{{\[}}%[[VAL_70]]] : memref<300xi1> +// CHECK: memref.store %[[True]], %[[VAL_22]]{{\[}}%[[VAL_70]]] : memref<300xi1> // CHECK: memref.store %[[VAL_70]], %[[VAL_24]]{{\[}}%[[VAL_69]]] : memref<300xindex> -// CHECK: %[[VAL_78:.*]] = arith.addi %[[VAL_69]], %[[VAL_6]] : index +// CHECK: %[[VAL_78:.*]] = arith.addi %[[VAL_69]], %[[I1]] : index // CHECK: scf.yield %[[VAL_78]] : index // CHECK: } else { // CHECK: scf.yield %[[VAL_69]] : index @@ -98,15 +102,15 @@ // CHECK: } else { // CHECK: scf.yield %[[VAL_54]] : index // CHECK: } -// CHECK: %[[VAL_81:.*]] = arith.addi %[[VAL_52]], %[[VAL_6]] : index +// CHECK: %[[VAL_81:.*]] = arith.addi %[[VAL_52]], %[[I1]] : index // CHECK: %[[VAL_82:.*]] = arith.select %[[VAL_59]], %[[VAL_81]], %[[VAL_52]] : index -// CHECK: %[[VAL_83:.*]] = arith.addi %[[VAL_53]], %[[VAL_6]] : index +// CHECK: %[[VAL_83:.*]] = arith.addi %[[VAL_53]], %[[I1]] : index // CHECK: %[[VAL_84:.*]] = arith.select %[[VAL_60]], %[[VAL_83]], %[[VAL_53]] : index // CHECK: scf.yield %[[VAL_82]], %[[VAL_84]], %[[VAL_85:.*]] : index, index, index // CHECK: } // CHECK: %[[VAL_86:.*]] = memref.alloca() : memref<2xindex> // CHECK: %[[VAL_87:.*]] = memref.cast %[[VAL_86]] : memref<2xindex> to memref -// CHECK: memref.store %[[VAL_39]], %[[VAL_86]]{{\[}}%[[VAL_5]]] : memref<2xindex> +// CHECK: memref.store %[[VAL_39]], %[[VAL_86]]{{\[}}%[[I0]]] : memref<2xindex> // CHECK: func.call @expInsertF64(%[[VAL_19]], %[[VAL_87]], %[[VAL_21]], %[[VAL_23]], %[[VAL_25]], %[[VAL_88:.*]]#2) : (!llvm.ptr, memref, memref, memref, memref, index) -> () // CHECK: } // CHECK: memref.dealloc %[[VAL_20]] : memref<300xf64> diff --git a/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir b/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir @@ -54,9 +54,9 @@ // CHECK-MIR-DAG: %[[VAL_2:.*]] = arith.constant 2 : index // CHECK-MIR-DAG: %[[VAL_3:.*]] = arith.constant 1 : index // CHECK-MIR-DAG: %[[VAL_4:.*]] = arith.constant 0 : index -// CHECK-MIR-DAG: %[[VAL_5:.*]] = call @sparseDimSize(%[[VAL_0]], %[[VAL_4]]) : (!llvm.ptr, index) -> index -// CHECK-MIR-DAG: %[[VAL_6:.*]] = call @sparseDimSize(%[[VAL_0]], %[[VAL_3]]) : (!llvm.ptr, index) -> index -// CHECK-MIR-DAG: %[[VAL_7:.*]] = call @sparseDimSize(%[[VAL_0]], %[[VAL_2]]) : (!llvm.ptr, index) -> index +// CHECK-MIR-DAG: %[[VAL_5:.*]] = call @sparseLvlSize(%[[VAL_0]], %[[VAL_4]]) : (!llvm.ptr, index) -> index +// CHECK-MIR-DAG: %[[VAL_6:.*]] = call @sparseLvlSize(%[[VAL_0]], %[[VAL_3]]) : (!llvm.ptr, index) -> index +// CHECK-MIR-DAG: %[[VAL_7:.*]] = call @sparseLvlSize(%[[VAL_0]], %[[VAL_2]]) : (!llvm.ptr, index) -> index // CHECK-MIR-DAG: %[[VAL_8:.*]] = call @sparseValuesF32(%[[VAL_0]]) : (!llvm.ptr) -> memref // CHECK-MIR-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref // CHECK-MIR: %[[VAL_11:.*]] = tensor.extract %[[VAL_1]][] : tensor diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -6835,13 +6835,16 @@ srcs = [ "lib/ExecutionEngine/SparseTensor/File.cpp", "lib/ExecutionEngine/SparseTensor/NNZ.cpp", + "lib/ExecutionEngine/SparseTensor/PermutationRef.cpp", "lib/ExecutionEngine/SparseTensor/Storage.cpp", ], hdrs = [ + "include/mlir/ExecutionEngine/SparseTensor/Attributes.h", "include/mlir/ExecutionEngine/SparseTensor/COO.h", "include/mlir/ExecutionEngine/SparseTensor/CheckedMul.h", "include/mlir/ExecutionEngine/SparseTensor/ErrorHandling.h", "include/mlir/ExecutionEngine/SparseTensor/File.h", + "include/mlir/ExecutionEngine/SparseTensor/PermutationRef.h", "include/mlir/ExecutionEngine/SparseTensor/Storage.h", ], includes = ["include"],