diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Attributes.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Attributes.h
new file mode 100644
--- /dev/null
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Attributes.h
@@ -0,0 +1,75 @@
+//===- Attributes.h - C++ attributes for SparseTensorRuntime ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines various macros for using C++ attributes whenever
+// they're supported by the compiler.  These macros are the same as the
+// versions in the LLVMSupport library, but we define our own versions
+// in order to avoid introducing that dependency just for the sake of
+// these macros.  (If we ever do end up depending on LLVMSupport, then
+// we should remove this header and use "llvm/Support/Compiler.h" instead.)
+//
+// This file is part of the lightweight runtime support library for sparse
+// tensor manipulations.  The functionality of the support library is meant
+// to simplify benchmarking, testing, and debugging MLIR code operating on
+// sparse tensors.  However, the provided functionality is **not** part of
+// core MLIR itself.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_ATTRIBUTES_H
+#define MLIR_EXECUTIONENGINE_SPARSETENSOR_ATTRIBUTES_H
+
+// A wrapper around `__has_cpp_attribute` for C++11 style attributes,
+// which are defined by ISO C++ SD-6.
+// <https://en.cppreference.com/w/cpp/experimental/feature_test>
+#if defined(__cplusplus) && defined(__has_cpp_attribute)
+// NOTE: The __cplusplus requirement should be unnecessary, but guards
+// against issues with GCC <https://bugs.llvm.org/show_bug.cgi?id=23435>.
+#define MLIR_SPARSETENSOR_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
+#else
+#define MLIR_SPARSETENSOR_HAS_CPP_ATTRIBUTE(x) 0
+#endif
+
+// A wrapper around `__has_attribute`, which is defined by GCC 5+ and Clang.
+// GCC: <https://gcc.gnu.org/gcc-5/changes.html>
+// Clang: <https://clang.llvm.org/docs/LanguageExtensions.html>
+#ifdef __has_attribute
+#define MLIR_SPARSETENSOR_HAS_ATTRIBUTE(x) __has_attribute(x)
+#else
+#define MLIR_SPARSETENSOR_HAS_ATTRIBUTE(x) 0
+#endif
+
+// An attribute for non-owning classes (like `PermutationRef`) to enable
+// lifetime warnings.
+#if MLIR_SPARSETENSOR_HAS_CPP_ATTRIBUTE(gsl::Pointer)
+#define MLIR_SPARSETENSOR_GSL_POINTER [[gsl::Pointer]]
+#else
+#define MLIR_SPARSETENSOR_GSL_POINTER
+#endif
+
+// An attribute for functions which are "pure" in the following sense:
+// * the result depends only on the function arguments
+// * pointer arguments are allowed to be read from, but not written to
+// * has no observable effects other than the return value
+//
+// This allows the compiler to avoid repeated function calls whenever
+// it can determine that the arguments are the same and that memory has
+// not changed.
+//
+// This macro is called `LLVM_READONLY` by LLVMSupport.  This definition
+// differs slightly from the LLVM version by using `gnu::pure` when
+// available, like Abseil's `ABSL_ATTRIBUTE_PURE_FUNCTION`.
+#if MLIR_SPARSETENSOR_HAS_CPP_ATTRIBUTE(gnu::pure)
+#define MLIR_SPARSETENSOR_PURE [[gnu::pure]]
+#elif MLIR_SPARSETENSOR_HAS_ATTRIBUTE(pure) || defined(__GNUC__)
+#define MLIR_SPARSETENSOR_PURE __attribute__((pure))
+#else
+#define MLIR_SPARSETENSOR_PURE
+#endif
+
+#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_ATTRIBUTES_H
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h
@@ -17,6 +17,8 @@
 #ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_COO_H
 #define MLIR_EXECUTIONENGINE_SPARSETENSOR_COO_H
 
+#include "mlir/ExecutionEngine/SparseTensor/PermutationRef.h"
+
 #include <algorithm>
 #include <cassert>
 #include <cinttypes>
@@ -102,34 +104,40 @@
   using difference_type = typename vector_type::difference_type;
   using size_type = typename vector_type::size_type;
 
-  SparseTensorCOO(const std::vector<uint64_t> &dimSizes, uint64_t capacity)
-      : dimSizes(dimSizes), isSorted(true) {
+  /// Constructs a new coordinate-scheme sparse tensor with the given
+  /// sizes and initial storage capacity.
+  ///
+  /// Asserts:
+  /// * `dimSizes` has nonzero size.
+  /// * the elements of `dimSizes` are nonzero.
+  explicit SparseTensorCOO(const std::vector<uint64_t> &dimSizes,
+                           uint64_t capacity = 0)
+      : SparseTensorCOO(dimSizes.size(), dimSizes.data(), capacity) {}
+
+  // TODO: make a class for capturing known-valid sizes (a la PermutationRef),
+  // so that `SparseTensorStorage::toCOO` can avoid redoing these assertions.
+  // Also so that we can enforce the asserts *before* copying into `dimSizes`.
+  //
+  /// Constructs a new coordinate-scheme sparse tensor with the given
+  /// sizes and initial storage capacity.
+  ///
+  /// Precondition: `dimSizes` must be valid for `dimRank`.
+  ///
+  /// Asserts:
+  /// * `dimRank` is nonzero.
+  /// * the elements of `dimSizes` are nonzero.
+  explicit SparseTensorCOO(uint64_t dimRank, const uint64_t *dimSizes,
+                           uint64_t capacity = 0)
+      : dimSizes(dimSizes, dimSizes + dimRank), isSorted(true) {
+    assert(dimRank > 0 && "Trivial shape is not supported");
+    for (uint64_t d = 0; d < dimRank; ++d)
+      assert(dimSizes[d] > 0 && "Dimension size zero has trivial storage");
     if (capacity) {
       elements.reserve(capacity);
-      indices.reserve(capacity * getRank());
+      indices.reserve(capacity * dimRank);
     }
   }
 
-  /// Factory method. Permutes the original dimensions according to
-  /// the given ordering and expects subsequent add() calls to honor
-  /// that same ordering for the given indices. The result is a
-  /// fully permuted coordinate scheme.
-  ///
-  /// Precondition: `dimSizes` and `perm` must be valid for `rank`.
-  ///
-  /// Asserts: the elements of `dimSizes` are non-zero.
-  static SparseTensorCOO<V> *newSparseTensorCOO(uint64_t rank,
-                                                const uint64_t *dimSizes,
-                                                const uint64_t *perm,
-                                                uint64_t capacity = 0) {
-    std::vector<uint64_t> permsz(rank);
-    for (uint64_t r = 0; r < rank; ++r) {
-      assert(dimSizes[r] > 0 && "Dimension size zero has trivial storage");
-      permsz[perm[r]] = dimSizes[r];
-    }
-    return new SparseTensorCOO<V>(permsz, capacity);
-  }
-
   /// Gets the rank of the tensor.
   uint64_t getRank() const { return dimSizes.size(); }
 
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
@@ -26,6 +26,7 @@
 #ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H
 #define MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H
 
+#include "mlir/ExecutionEngine/SparseTensor/PermutationRef.h"
 #include "mlir/ExecutionEngine/SparseTensor/Storage.h"
 
 #include <fstream>
@@ -113,9 +114,6 @@
   /// Closes the file.
   void closeFile();
 
-  /// Attempts to read a line from the file.
-  char *readLine();
-
   /// Reads and parses the file's header.
   void readHeader();
 
@@ -174,28 +172,23 @@
   /// returns the value of the element. Stores the coordinates of the element
   /// to the `indices` array.
   template <typename V>
-  V readCOOElement(uint64_t rank, uint64_t *indices,
-                   const uint64_t *perm = nullptr) {
-    assert(rank == getRank() && "Rank mismatch");
-    char *linePtr = readLine();
-    if (perm)
-      for (uint64_t r = 0; r < rank; ++r) {
-        // Parse the 1-based index.
-        uint64_t idx = strtoul(linePtr, &linePtr, 10);
-        // Store the 0-based index.
-        indices[perm[r]] = idx - 1;
-      }
-    else
-      for (uint64_t r = 0; r < rank; ++r) {
-        // Parse the 1-based index.
-        uint64_t idx = strtoul(linePtr, &linePtr, 10);
-        // Store the 0-based index.
-        indices[r] = idx - 1;
-      }
+  V readCOOElement(uint64_t rank, uint64_t *indices) {
+    char *linePtr = readCOOIndices(rank, indices);
     return detail::readCOOValue<V>(&linePtr, isPattern());
   }
 
 private:
+  /// Attempts to read a line from the file.  Is private because there's
+  /// no reason for client code to call it.
+  void readLine();
+
+  /// Reads the next line of the input file and parses the coordinates
+  /// into the `indices` argument.  Returns the position in the `line`
+  /// buffer where the element's value should be parsed from.  This method
+  /// has been factored out from `readCOOElement` to minimize code bloat
+  /// for the generated library.
+  char *readCOOIndices(uint64_t rank, uint64_t *indices);
+
   /// Reads the MME header of a general sparse matrix of type real.
   void readMMEHeader();
 
@@ -217,40 +210,71 @@
 //===----------------------------------------------------------------------===//
 
 /// Reads a sparse tensor with the given filename into a memory-resident
-/// sparse tensor in coordinate scheme.
-template <typename V>
-inline SparseTensorCOO<V> *
-openSparseTensorCOO(const char *filename, uint64_t rank, const uint64_t *shape,
-                    const uint64_t *perm, PrimaryType valTp) {
+/// sparse tensor.
+///
+/// Preconditions:
+/// * `dimShape` and `dim2lvl` must be valid for `dimRank`.
+/// * `lvlTypes` and `lvl2dim` must be valid for `lvlRank`.
+/// * `dim2lvl` is the inverse of `lvl2dim`.
+///
+/// Asserts:
+/// * the file's actual value type can be read as `valTp`.
+/// * the file's actual dimension-sizes match the expected `dimShape`.
+/// * `dim2lvl` is a permutation, and therefore also `dimRank == lvlRank`.
+//
+// TODO: As currently written, this function uses `dim2lvl` in two
+// places: first, to construct the level-sizes from the file's actual
+// dimension-sizes; and second, to map the file's dimension-indices into
+// level-indices.  The latter can easily generalize to arbitrary mappings,
+// however the former cannot.  Thus, once we functionalize the mappings,
+// this function will need both the sizes-to-sizes and indices-to-indices
+// variants of the `dim2lvl` mapping.  For the `lvl2dim` direction we only
+// need the indices-to-indices variant, for handing off to `newFromCOO`.
+template <typename P, typename I, typename V>
+inline SparseTensorStorage<P, I, V> *
+openSparseTensor(uint64_t dimRank, const uint64_t *dimShape, uint64_t lvlRank,
+                 const DimLevelType *lvlTypes, const uint64_t *lvl2dim,
+                 const uint64_t *dim2lvl, const char *filename,
+                 PrimaryType valTp) {
+  // Read the file's header and check the file's actual element type and
+  // dimension-sizes against the expected element type and dimension-shape.
   SparseTensorReader stfile(filename);
   stfile.openFile();
   stfile.readHeader();
-  // Check tensor element type against the value type in the input file.
   if (!stfile.canReadAs(valTp))
     MLIR_SPARSETENSOR_FATAL(
         "Tensor element type %d not compatible with values in file %s\n",
         static_cast<int>(valTp), filename);
-  stfile.assertMatchesShape(rank, shape);
-  // Prepare sparse tensor object with per-dimension sizes
-  // and the number of nonzeros as initial capacity.
+  stfile.assertMatchesShape(dimRank, dimShape);
+  const uint64_t *dimSizes = stfile.getDimSizes();
+  // Construct the level-sizes from the file's dimension-sizes
+  // NOTE: This doesn't generalize to arbitrary mappings. See the todo above.
+  assert(dimRank == lvlRank && "Rank mismatch");
+  detail::PermutationRef d2l(dimRank, dim2lvl);
+  std::vector<uint64_t> lvlSizes = d2l.pushforward(dimRank, dimSizes);
+  // Prepare a COO object with the number of nonzeros as initial capacity.
   uint64_t nnz = stfile.getNNZ();
-  auto *coo = SparseTensorCOO<V>::newSparseTensorCOO(rank, stfile.getDimSizes(),
-                                                     perm, nnz);
+  auto *lvlCOO = new SparseTensorCOO<V>(lvlSizes, nnz);
   // Read all nonzero elements.
-  std::vector<uint64_t> indices(rank);
+  std::vector<uint64_t> dimInd(dimRank);
+  std::vector<uint64_t> lvlInd(lvlRank);
   for (uint64_t k = 0; k < nnz; ++k) {
-    const V value = stfile.readCOOElement<V>(rank, indices.data(), perm);
+    const V value = stfile.readCOOElement<V>(dimRank, dimInd.data());
+    d2l.pushforward(dimRank, dimInd.data(), lvlInd.data());
     // TODO: <https://github.com/llvm/llvm-project/issues/54179>
-    coo->add(indices, value);
+    lvlCOO->add(lvlInd, value);
     // We currently chose to deal with symmetric matrices by fully
     // constructing them.  In the future, we may want to make symmetry
     // implicit for storage reasons.
-    if (stfile.isSymmetric() && indices[0] != indices[1])
-      coo->add({indices[1], indices[0]}, value);
+    if (stfile.isSymmetric() && lvlInd[0] != lvlInd[1])
+      lvlCOO->add({lvlInd[1], lvlInd[0]}, value);
   }
-  // Close the file and return tensor.
+  // Close the file, convert the COO to STS, and return.
   stfile.closeFile();
-  return coo;
+  auto *tensor = SparseTensorStorage<P, I, V>::newFromCOO(
+      dimRank, dimSizes, lvlRank, lvlTypes, lvl2dim, *lvlCOO);
+  delete lvlCOO;
+  return tensor;
 }
 
 /// Writes the sparse tensor to `filename` in extended FROSTT format.
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/PermutationRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/PermutationRef.h
new file mode 100644
--- /dev/null
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/PermutationRef.h
@@ -0,0 +1,142 @@
+//===- PermutationRef.h - Permutation reference wrapper ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header is not part of the public API.  It is placed in the
+// includes directory only because that's required by the implementations
+// of template-classes.
+//
+// This file is part of the lightweight runtime support library for sparse
+// tensor manipulations.  The functionality of the support library is meant
+// to simplify benchmarking, testing, and debugging MLIR code operating on
+// sparse tensors.  However, the provided functionality is **not** part of
+// core MLIR itself.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_PERMUTATIONREF_H
+#define MLIR_EXECUTIONENGINE_SPARSETENSOR_PERMUTATIONREF_H
+
+#include "mlir/ExecutionEngine/SparseTensor/Attributes.h"
+#include "mlir/ExecutionEngine/SparseTensor/ErrorHandling.h"
+
+#include <cassert>
+#include <cinttypes>
+#include <vector>
+
+namespace mlir {
+namespace sparse_tensor {
+namespace detail {
+
+/// Checks whether the `perm` array is a permutation of `[0 .. size)`.
+MLIR_SPARSETENSOR_PURE bool isPermutation(uint64_t size, const uint64_t *perm);
+
+/// Wrapper around `isPermutation` to ensure consistent error messages.
+inline void assertIsPermutation(uint64_t size, const uint64_t *perm) {
+#ifndef NDEBUG
+  if (!isPermutation(size, perm))
+    MLIR_SPARSETENSOR_FATAL("Not a permutation of [0..%" PRIu64 ")\n", size);
+#endif
+}
+
+// TODO: To implement things like `inverse` and `compose` while preserving
+// the knowledge that `isPermutation` is true, we'll need to also have
+// an owning version of `PermutationRef`.  (Though ideally we'll really
+// want to defunctionalize those methods so that we can avoid intermediate
+// arrays/copies and only materialize the data on request.)
+
+/// A non-owning class for capturing the knowledge that `isPermutation`
+/// is true, to avoid needing to assert it repeatedly.
+class MLIR_SPARSETENSOR_GSL_POINTER [[nodiscard]] PermutationRef final {
+public:
+  /// Asserts `isPermutation` and returns the witness to that being true.
+  //
+  // TODO: For now the assertive ctor is sufficient, but in principle
+  // we'll want a factory that can optionally construct the object
+  // (so callers can handle errors themselves).
+  explicit PermutationRef(uint64_t size, const uint64_t *perm)
+      : permSize(size), perm(perm) {
+    assertIsPermutation(size, perm);
+  }
+
+  uint64_t size() const { return permSize; }
+
+  const uint64_t *data() const { return perm; }
+
+  const uint64_t &operator[](uint64_t i) const {
+    assert(i < permSize && "index is out of bounds");
+    return perm[i];
+  }
+
+  /// Constructs a pushforward array of values.  This method is the inverse
+  /// of `permute` in the sense that for all `p` and `xs` we have:
+  /// * `p.permute(p.pushforward(xs)) == xs`
+  /// * `p.pushforward(p.permute(xs)) == xs`
+  template <typename T>
+  inline std::vector<T> pushforward(const std::vector<T> &values) const {
+    return pushforward(values.size(), values.data());
+  }
+
+  template <typename T>
+  inline std::vector<T> pushforward(uint64_t size, const T *values) const {
+    std::vector<T> out(permSize);
+    pushforward(size, values, out.data());
+    return out;
+  }
+
+  // NOTE: This form of the method is required by `toMLIRSparseTensor`,
+  // so it can reuse the `out` buffer for each iteration of a loop.
+  template <typename T>
+  inline void pushforward(uint64_t size, const T *values, T *out) const {
+    assert(size == permSize && "size mismatch");
+    for (uint64_t i = 0; i < permSize; ++i)
+      out[perm[i]] = values[i];
+  }
+
+  // NOTE: this is only needed by `toMLIRSparseTensor`, which in
+  // turn only needs it as a vector to hand off to `newSparseTensor`.
+  // Otherwise we would want the result to be an owning-permutation,
+  // to retain the knowledge that `isPermutation` is true.
+  //
+  /// Constructs the inverse permutation.  This is equivalent to calling
+  /// `pushforward` with `std::iota` for the values.
+  std::vector<uint64_t> inverse() const;
+
+  /// Constructs a permuted array of values.  This method is the inverse
+  /// of `pushforward` in the sense that for all `p` and `xs` we have:
+  /// * `p.permute(p.pushforward(xs)) == xs`
+  /// * `p.pushforward(p.permute(xs)) == xs`
+  template <typename T>
+  inline std::vector<T> permute(const std::vector<T> &values) const {
+    return permute(values.size(), values.data());
+  }
+
+  template <typename T>
+  inline std::vector<T> permute(uint64_t size, const T *values) const {
+    std::vector<T> out(permSize);
+    permute(size, values, out.data());
+    return out;
+  }
+
+  template <typename T>
+  inline void permute(uint64_t size, const T *values, T *out) const {
+    assert(size == permSize && "size mismatch");
+    for (uint64_t i = 0; i < permSize; ++i)
+      out[i] = values[perm[i]];
+    return out;
+  }
+
+private:
+  const uint64_t permSize;
+  const uint64_t *const perm; // non-owning pointer.
+};
+
+} // namespace detail
+} // namespace sparse_tensor
+} // namespace mlir
+
+#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_PERMUTATIONREF_H
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
@@ -35,6 +35,7 @@
 
 #include "mlir/Dialect/SparseTensor/IR/Enums.h"
 #include "mlir/ExecutionEngine/Float16bits.h"
+#include "mlir/ExecutionEngine/SparseTensor/Attributes.h"
 #include "mlir/ExecutionEngine/SparseTensor/COO.h"
 #include "mlir/ExecutionEngine/SparseTensor/CheckedMul.h"
 #include "mlir/ExecutionEngine/SparseTensor/ErrorHandling.h"
@@ -50,14 +51,22 @@
 
 // These macros ensure consistent error messages, without risk of incuring
 // an additional method call to do so.
-#define ASSERT_VALID_DIM(d)                                                    \
-  assert(d < getRank() && "Dimension index is out of bounds");
-#define ASSERT_COMPRESSED_DIM(d)                                               \
-  assert(isCompressedDim(d) && "Dimension is not compressed");
-#define ASSERT_COMPRESSED_OR_SINGLETON_DIM(d)                                  \
-  assert((isCompressedDim(d) || isSingletonDim(d)) &&                          \
-         "Dimension is neither compressed nor singleton");
-#define ASSERT_DENSE_DIM(d) assert(isDenseDim(d) && "Dimension is not dense");
+#define ASSERT_VALID_LVL(l)                                                    \
+  assert(l < getLvlRank() && "Level index is out of bounds");
+#define ASSERT_COMPRESSED_LVL(l)                                               \
+  assert(isCompressedLvl(l) && "Level is not compressed");
+#define ASSERT_COMPRESSED_OR_SINGLETON_LVL(l)                                  \
+  do {                                                                         \
+    const DimLevelType dlt = getLvlType(l);                                    \
+    (void)dlt;                                                                 \
+    assert((isCompressedDLT(dlt) || isSingletonDLT(dlt)) &&                    \
+           "Level is neither compressed nor singleton");                       \
+  } while (false)
+// Because the `SparseTensorStorageBase` ctor uses `MLIR_SPARSETENSOR_FATAL`
+// (rather than `assert`) when validating level-types, all the uses of
+// `ASSERT_DENSE_DLT` are technically unnecessary.  However, they are
+// retained for the sake of future-proofing.
+#define ASSERT_DENSE_DLT(dlt) assert(isDenseDLT(dlt) && "Level is not dense");
 
 /// Abstract base class for `SparseTensorStorage<P,I,V>`.  This class
 /// takes responsibility for all the `<P,I,V>`-independent aspects
@@ -65,6 +74,41 @@
 /// we use function overloading to implement "partial" method
 /// specialization, which the C-API relies on to catch type errors
 /// arising from our use of opaque pointers.
+///
+/// Because this class forms a bridge between the denotational semantics
+/// of "tensors" and the operational semantics of how we store and
+/// compute with them, it also distinguishes between two different
+/// coordinate spaces (and their associated rank, shape, sizes, etc).
+/// Denotationally, we have the *dimensions* of the tensor represented
+/// by this object.  Operationally, we have the *levels* of the storage
+/// representation itself.  We use this "dimension" vs "level" terminology
+/// throughout, since alternative terminology like "tensor-dimension",
+/// "original-dimension", "storage-dimension", etc, is both more verbose
+/// and prone to introduce confusion whenever the qualifiers are dropped.
+/// Where necessary, we use "axis" as the generic term.
+///
+/// The *size* of an axis is the cardinality of possible coordinate/index
+/// values along that axis (regardless of which coordinates have stored
+/// element values).  As such, each size must be non-zero since if any
+/// axis has size-zero then the whole tensor would have trivial storage
+/// (since there are no possible coordinates).  Thus we use the plural
+/// term *sizes* for a collection of non-zero cardinalities, and use
+/// this term whenever referring to run-time cardinalities.  Whereas we
+/// use the term *shape* for a collection of compile-time cardinalities,
+/// where zero is used to indicate cardinalities which are dynamic (i.e.,
+/// unknown/unspecified at compile-time).  At run-time, these dynamic
+/// cardinalities will be inferred from or checked against sizes otherwise
+/// specified.  Thus, dynamic cardinalities always have an "immutable but
+/// unknown" value; so the term "dynamic" should not be taken to indicate
+/// run-time mutability.
+//
+// TODO: we'd like to factor out a class akin to `PermutationRef` for
+// capturing known-valid sizes to avoid redundant validity assertions.
+// But calling that class "SizesRef" would be a terrible name (and
+// "ValidSizesRef" isn't much better).  Whereas, calling it "ShapeRef"
+// would be a lot nicer, but then that conflicts with the terminology
+// introduced above.  So we need to come up with some new terminology
+// for distinguishing things, which allows a reasonable class name too.
 class SparseTensorStorageBase {
 protected:
   // Since this class is virtual, we must disallow public copying in
@@ -72,69 +116,92 @@
   // that means making copying protected.
   // <https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#Rc-copy-virtual>
   SparseTensorStorageBase(const SparseTensorStorageBase &) = default;
-  // Copy-assignment would be implicitly deleted (because `dimSizes`
-  // is const), so we explicitly delete it for clarity.
+  // Copy-assignment would be implicitly deleted (because our fields
+  // are const), so we explicitly delete it for clarity.
   SparseTensorStorageBase &operator=(const SparseTensorStorageBase &) = delete;
 
 public:
-  /// Constructs a new storage object.  The `perm` maps the tensor's
-  /// semantic-ordering of dimensions to this object's storage-order.
-  /// The `dimSizes` and `sparsity` arrays are already in storage-order.
+  /// Constructs a new sparse-tensor storage object with the given encoding.
   ///
-  /// Precondition: `perm` and `sparsity` must be valid for `dimSizes.size()`.
-  SparseTensorStorageBase(const std::vector<uint64_t> &dimSizes,
-                          const uint64_t *perm, const DimLevelType *sparsity);
+  /// Preconditions:
+  /// * `dimSizes`, `lvlSizes`, `lvlTypes`, and `lvl2dim` must be nonnull.
+  /// * `dimSizes` must be valid for `dimRank`.
+  /// * `lvlSizes`, `lvlTypes`, and `lvl2dim` must be valid for `lvlRank`.
+  /// * `lvl2dim` must map indices valid for `lvlSizes` to indices valid
+  ///   for `dimSizes`.
+  ///
+  /// Asserts:
+  /// * `dimRank` and `lvlRank` are nonzero.
+  /// * `dimSizes` and `lvlSizes` contain only nonzero sizes.
+  SparseTensorStorageBase(uint64_t dimRank, const uint64_t *dimSizes,
+                          uint64_t lvlRank, const uint64_t *lvlSizes,
+                          const DimLevelType *lvlTypes,
+                          const uint64_t *lvl2dim);
+  // NOTE: For the most part we only need the `dimRank`.  But we need
+  // `dimSizes` for `toCOO` to support the identity permutation nicely
+  // (i.e., without the caller needing to already know the tensor's
+  // dimension-sizes; e.g., as in `fromMLIRSparseTensor`).
 
   virtual ~SparseTensorStorageBase() = default;
 
-  /// Gets the rank of the tensor.
-  uint64_t getRank() const { return dimSizes.size(); }
+  /// Gets the number of tensor-dimensions.
+  uint64_t getDimRank() const { return dimSizes.size(); }
 
-  /// Gets the dimension-sizes array, in storage-order.
+  /// Gets the number of storage-levels.
+  uint64_t getLvlRank() const { return lvlSizes.size(); }
+
+  /// Gets the tensor-dimension sizes array.
   const std::vector<uint64_t> &getDimSizes() const { return dimSizes; }
 
-  /// Safely looks up the size of the given (storage-order) dimension.
-  uint64_t getDimSize(uint64_t d) const {
-    ASSERT_VALID_DIM(d);
-    return dimSizes[d];
+  /// Gets the storage-level sizes array.
+  const std::vector<uint64_t> &getLvlSizes() const { return lvlSizes; }
+
+  /// Safely looks up the size of the given storage-level.
+  uint64_t getLvlSize(uint64_t l) const {
+    ASSERT_VALID_LVL(l);
+    return lvlSizes[l];
   }
 
-  /// Gets the "reverse" permutation, which maps this object's
-  /// storage-order to the tensor's semantic-order.
-  const std::vector<uint64_t> &getRev() const { return rev; }
+  /// Gets the level-to-dimension mapping.
+  const std::vector<uint64_t> &getLvl2Dim() const { return lvl2dim; }
 
-  /// Gets the dimension-types array, in storage-order.
-  const std::vector<DimLevelType> &getDimTypes() const { return dimTypes; }
+  /// Gets the level-types array.
+  const std::vector<DimLevelType> &getLvlTypes() const { return lvlTypes; }
 
-  /// Safely looks up the level-type of the given (storage-order) dimension.
-  DimLevelType getDimType(uint64_t d) const {
-    ASSERT_VALID_DIM(d);
-    return dimTypes[d];
+  /// Safely looks up the type of the given level.
+  DimLevelType getLvlType(uint64_t l) const {
+    ASSERT_VALID_LVL(l);
+    return lvlTypes[l];
   }
 
-  /// Safely checks if the (storage-order) dimension uses dense storage.
-  bool isDenseDim(uint64_t d) const { return isDenseDLT(getDimType(d)); }
+  /// Safely checks if the level uses dense storage.
+  bool isDenseLvl(uint64_t l) const { return isDenseDLT(getLvlType(l)); }
 
-  /// Safely checks if the (storage-order) dimension uses compressed storage.
-  bool isCompressedDim(uint64_t d) const {
-    return isCompressedDLT(getDimType(d));
+  /// Safely checks if the level uses compressed storage.
+  bool isCompressedLvl(uint64_t l) const {
+    return isCompressedDLT(getLvlType(l));
   }
 
-  /// Safely checks if the (storage-order) dimension uses singleton storage.
-  bool isSingletonDim(uint64_t d) const {
-    return isSingletonDLT(getDimType(d));
+  /// Safely checks if the level uses singleton storage.
+  bool isSingletonLvl(uint64_t l) const {
+    return isSingletonDLT(getLvlType(l));
   }
 
-  /// Safely checks if the (storage-order) dimension is ordered.
-  bool isOrderedDim(uint64_t d) const { return isOrderedDLT(getDimType(d)); }
+  /// Safely checks if the level is ordered.
+  bool isOrderedLvl(uint64_t l) const { return isOrderedDLT(getLvlType(l)); }
 
-  /// Safely checks if the (storage-order) dimension is unique.
-  bool isUniqueDim(uint64_t d) const { return isUniqueDLT(getDimType(d)); }
+  /// Safely checks if the level is unique.
+  bool isUniqueLvl(uint64_t l) const { return isUniqueDLT(getLvlType(l)); }
 
-  /// Allocates a new enumerator.
+  /// Allocates a new enumerator.  Callers must make sure to delete
+  /// the enumerator when they're done with it.  The first argument
+  /// is the out-parameter for storing the newly allocated enumerator;
+  /// all other arguments are passed along to the `SparseTensorEnumerator`
+  /// ctor and must satisfy the preconditions/assertions thereof.
 #define DECL_NEWENUMERATOR(VNAME, V)                                           \
   virtual void newEnumerator(SparseTensorEnumeratorBase<V> **, uint64_t,       \
-                             const uint64_t *) const;
+                             const uint64_t *, uint64_t, const uint64_t *)     \
+      const;
   MLIR_SPARSETENSOR_FOREVERY_V(DECL_NEWENUMERATOR)
 #undef DECL_NEWENUMERATOR
 
@@ -149,19 +216,33 @@
   virtual void getIndices(std::vector<I> **, uint64_t);
   MLIR_SPARSETENSOR_FOREVERY_FIXED_O(DECL_GETINDICES)
 #undef DECL_GETINDICES
-  virtual uint64_t getIndex(uint64_t d, uint64_t pos) const = 0;
+  virtual uint64_t getIndex(uint64_t l, uint64_t pos) const = 0;
 
   /// Gets primary storage.
 #define DECL_GETVALUES(VNAME, V) virtual void getValues(std::vector<V> **);
   MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETVALUES)
 #undef DECL_GETVALUES
 
-  /// Element-wise insertion in lexicographic index order.
+  /// Element-wise insertion in lexicographic index order.  The first
+  /// argument is the level-indices for the value being inserted.
+  // TODO: For better safety, this should take a parameter for the
+  // length of `lvlInd` and check that against `getLvlRank()`.
 #define DECL_LEXINSERT(VNAME, V) virtual void lexInsert(const uint64_t *, V);
   MLIR_SPARSETENSOR_FOREVERY_V(DECL_LEXINSERT)
 #undef DECL_LEXINSERT
 
-  /// Expanded insertion.
+  /// Expanded insertion.  Note that this method resets the
+  /// values/filled-switch array back to all-zero/false while only
+  /// iterating over the nonzero elements.
+  ///
+  /// Arguments:
+  /// * `lvlInd` the level-indices shared by the values being inserted.
+  /// * `values` a map from last-level coordinates to their associated value.
+  /// * `filled` a map from last-level coordinates to bool, indicating
+  ///   whether `values` contains a valid value to be inserted.
+  /// * `added` a map from `[0..count)` to last-level coordinates for
+  ///   which `filled` is true and `values` contains the assotiated value.
+  /// * `count` the size of `added`.
 #define DECL_EXPINSERT(VNAME, V)                                               \
   virtual void expInsert(uint64_t *, V *, bool *, uint64_t *, uint64_t);
   MLIR_SPARSETENSOR_FOREVERY_V(DECL_EXPINSERT)
@@ -172,8 +253,9 @@
 
 private:
   const std::vector<uint64_t> dimSizes;
-  std::vector<uint64_t> rev; // conceptually `const`
-  const std::vector<DimLevelType> dimTypes;
+  const std::vector<uint64_t> lvlSizes;
+  const std::vector<DimLevelType> lvlTypes;
+  const std::vector<uint64_t> lvl2dim;
 };
 
 //===----------------------------------------------------------------------===//
@@ -193,107 +275,179 @@
 class SparseTensorStorage final : public SparseTensorStorageBase {
   /// Private constructor to share code between the other constructors.
   /// Beware that the object is not necessarily guaranteed to be in a
-  /// valid state after this constructor alone; e.g., `isCompressedDim(d)`
-  /// doesn't entail `!(pointers[d].empty())`.
+  /// valid state after this constructor alone; e.g., `isCompressedLvl(l)`
+  /// doesn't entail `!(pointers[l].empty())`.
   ///
-  /// Precondition: `perm` and `sparsity` must be valid for `dimSizes.size()`.
-  SparseTensorStorage(const std::vector<uint64_t> &dimSizes,
-                      const uint64_t *perm, const DimLevelType *sparsity)
-      : SparseTensorStorageBase(dimSizes, perm, sparsity), pointers(getRank()),
-        indices(getRank()), idx(getRank()) {}
+  /// Preconditions/assertions are as per the `SparseTensorStorageBase` ctor.
+  SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
+                      uint64_t lvlRank, const uint64_t *lvlSizes,
+                      const DimLevelType *lvlTypes, const uint64_t *lvl2dim)
+      : SparseTensorStorageBase(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes,
+                                lvl2dim),
+        pointers(lvlRank), indices(lvlRank), lvlCursor(lvlRank) {}
 
 public:
-  /// Constructs a sparse tensor storage scheme with the given dimensions,
-  /// permutation, and per-dimension dense/sparse annotations, using
-  /// the coordinate scheme tensor for the initial contents if provided.
+  /// Constructs a sparse tensor with the given encoding, and allocates
+  /// overhead storage according to some simple heuristics.  When the
+  /// `bool` argument is true and `lvlTypes` are all dense, then this
+  /// ctor will also initialize the values array with zeros.  That
+  /// argument should be true when an empty tensor is intended; whereas
+  /// it should usually be false when the ctor will be followed up by
+  /// some other form of initialization.
   ///
-  /// Precondition: `perm` and `sparsity` must be valid for `dimSizes.size()`.
-  SparseTensorStorage(const std::vector<uint64_t> &dimSizes,
-                      const uint64_t *perm, const DimLevelType *sparsity,
-                      SparseTensorCOO<V> *coo);
+  /// Preconditions/assertions are as per the `SparseTensorStorageBase` ctor.
+  SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
+                      uint64_t lvlRank, const uint64_t *lvlSizes,
+                      const DimLevelType *lvlTypes, const uint64_t *lvl2dim,
+                      bool initializeValuesIfAllDense);
 
-  /// Constructs a sparse tensor storage scheme with the given dimensions,
-  /// permutation, and per-dimension dense/sparse annotations, using
-  /// the given sparse tensor for the initial contents.
+  /// Constructs a sparse tensor with the given encoding, and initializes
+  /// the contents from the COO.  This ctor performs the same heuristic
+  /// overhead-storage allocation as the ctor taking a `bool`, and
+  /// has the same preconditions/assertions (where we define `lvlSizes =
+  /// lvlCOO.getDimSizes().data()`), with the following addition:
   ///
-  /// Preconditions:
-  /// * `perm` and `sparsity` must be valid for `dimSizes.size()`.
-  /// * The `tensor` must have the same value type `V`.
-  SparseTensorStorage(const std::vector<uint64_t> &dimSizes,
-                      const uint64_t *perm, const DimLevelType *sparsity,
-                      const SparseTensorStorageBase &tensor);
+  /// Asserts:
+  /// * `lvlRank == lvlCOO.getRank()`.
+  SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
+                      uint64_t lvlRank, const DimLevelType *lvlTypes,
+                      const uint64_t *lvl2dim, SparseTensorCOO<V> &lvlCOO);
+
+  /// Constructs a sparse tensor with the given encoding, and initializes
+  /// the contents from the enumerator.  This ctor allocates exactly
+  /// the required amount of overhead storage, not using any heuristics.
+  /// Preconditions/assertions are as per the `SparseTensorStorageBase`
+  /// ctor (where we define `lvlSizes = lvlEnumerator.getTrgSizes().data()`),
+  /// with the following addition:
+  ///
+  /// Asserts:
+  /// * `lvlRank == lvlEnumerator.getTrgRank()`.
+  SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
+                      uint64_t lvlRank, const DimLevelType *lvlTypes,
+                      const uint64_t *lvl2dim,
+                      SparseTensorEnumeratorBase<V> &lvlEnumerator);
 
-  /// Factory method. Constructs a sparse tensor storage scheme with the given
-  /// dimensions, permutation, and per-dimension dense/sparse annotations,
-  /// using the coordinate scheme tensor for the initial contents if provided.
-  /// In the latter case, the coordinate scheme must respect the same
-  /// permutation as is desired for the new sparse tensor storage.
+  /// Allocates a new empty sparse tensor.  The preconditions/assertions
+  /// are as per the `SparseTensorStorageBase` ctor; which is to say,
+  /// the `dimSizes` and `lvlSizes` must both be "sizes" not "shapes",
+  /// since there's nowhere to reconstruct dynamic sizes from.
+  static SparseTensorStorage<P, I, V> *
+  newEmpty(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
+           const uint64_t *lvlSizes, const DimLevelType *lvlTypes,
+           const uint64_t *lvl2dim) {
+    return new SparseTensorStorage<P, I, V>(dimRank, dimSizes, lvlRank,
+                                            lvlSizes, lvlTypes, lvl2dim, true);
+  }
+
+  /// Allocates a new sparse tensor and initializes it from the given COO.
+  /// The preconditions are as per the `SparseTensorStorageBase` ctor
+  /// (where we define `lvlSizes = lvlCOO.getDimSizes().data()`), but
+  /// using the following assertions in lieu of the base ctor's assertions:
   ///
-  /// Precondition: `shape`, `perm`, and `sparsity` must be valid for `rank`.
+  /// Asserts:
+  /// * `dimRank` and `lvlRank` are nonzero.
+  /// * `lvlRank == lvlCOO.getRank()`.
+  /// * `lvlCOO.getDimSizes()` under the `lvl2dim` mapping is a refinement
+  ///   of `dimShape`.
+  //
+  // TODO: The ability to reconstruct dynamic dimensions-sizes does not
+  // easily generalize to arbitrary `lvl2dim` mappings.  When compiling
+  // MLIR programs to use this library, we should be able to generate
+  // code for effectively computing the reconstruction, but it's not clear
+  // that there's a feasible way to do so from within the library itself.
+  // Therefore, when we functionalize the `lvl2dim` mapping we'll have
+  // to update the type/preconditions of this factory too.
   static SparseTensorStorage<P, I, V> *
-  newSparseTensor(uint64_t rank, const uint64_t *shape, const uint64_t *perm,
-                  const DimLevelType *sparsity, SparseTensorCOO<V> *coo);
+  newFromCOO(uint64_t dimRank, const uint64_t *dimShape, uint64_t lvlRank,
+             const DimLevelType *lvlTypes, const uint64_t *lvl2dim,
+             SparseTensorCOO<V> &lvlCOO);
 
-  /// Factory method. Constructs a sparse tensor storage scheme with
-  /// the given dimensions, permutation, and per-dimension dense/sparse
-  /// annotations, using the sparse tensor for the initial contents.
+  /// Allocates a new sparse tensor and initializes it with the contents
+  /// of another sparse tensor.
   ///
   /// Preconditions:
-  /// * `shape`, `perm`, and `sparsity` must be valid for `rank`.
-  /// * The `tensor` must have the same value type `V`.
+  /// * as per the `SparseTensorStorageBase` ctor.
+  /// * `src2lvl` must be valid for `srcRank`, must map indices valid for
+  ///   `source.getDimSizes()` to indices valid for `lvlSizes`, and therefore
+  ///   must be the inverse of `lvl2dim`.
+  /// * `source` must have the same value type `V`.
+  ///
+  /// Asserts:
+  /// * `dimRank` and `lvlRank` are nonzero.
+  /// * `srcRank == source.getDimRank()`.
+  /// * `lvlSizes` contains only nonzero sizes.
+  /// * `source.getDimSizes()` is a refinement of `dimShape`.
+  //
+  // TODO: The `dimRank` and `dimShape` arguments are only used for
+  // verifying that the source tensor has the expected shape.  So if we
+  // wanted to skip that verification, then we could remove those arguments.
+  // Alternatively, if we required the `dimShape` to be "sizes" instead,
+  // then that would remove any constraints on `source.getDimSizes()`
+  // (other than compatibility with `src2lvl`) as well as removing the
+  // requirement that `src2lvl` be the inverse of `lvl2dim`.  Which would
+  // enable this factory to be used for performing a much larger class of
+  // transformations (which can already be handled by the `SparseTensorNNZ`
+  // implementation).
   static SparseTensorStorage<P, I, V> *
-  newSparseTensor(uint64_t rank, const uint64_t *shape, const uint64_t *perm,
-                  const DimLevelType *sparsity,
-                  const SparseTensorStorageBase *source);
+  newFromSparseTensor(uint64_t dimRank, const uint64_t *dimShape,
+                      uint64_t lvlRank, const uint64_t *lvlSizes,
+                      const DimLevelType *lvlTypes, const uint64_t *lvl2dim,
+                      uint64_t srcRank, const uint64_t *src2lvl,
+                      const SparseTensorStorageBase &source);
 
   ~SparseTensorStorage() final = default;
 
   /// Partially specialize these getter methods based on template types.
-  void getPointers(std::vector<P> **out, uint64_t d) final {
-    ASSERT_VALID_DIM(d);
-    *out = &pointers[d];
+  void getPointers(std::vector<P> **out, uint64_t l) final {
+    assert(out && "Received nullptr for out parameter");
+    ASSERT_VALID_LVL(l);
+    *out = &pointers[l];
   }
-  void getIndices(std::vector<I> **out, uint64_t d) final {
-    ASSERT_VALID_DIM(d);
-    *out = &indices[d];
+  void getIndices(std::vector<I> **out, uint64_t l) final {
+    assert(out && "Received nullptr for out parameter");
+    ASSERT_VALID_LVL(l);
+    *out = &indices[l];
+  }
+  void getValues(std::vector<V> **out) final {
+    assert(out && "Received nullptr for out parameter");
+    *out = &values;
   }
-  void getValues(std::vector<V> **out) final { *out = &values; }
 
-  uint64_t getIndex(uint64_t d, uint64_t pos) const final {
-    ASSERT_COMPRESSED_OR_SINGLETON_DIM(d);
-    assert(pos < indices[d].size() && "Index position is out of bounds");
-    return indices[d][pos]; // Converts the stored `I` into `uint64_t`.
+  uint64_t getIndex(uint64_t l, uint64_t pos) const final {
+    ASSERT_COMPRESSED_OR_SINGLETON_LVL(l);
+    assert(pos < indices[l].size() && "Index position is out of bounds");
+    return indices[l][pos]; // Converts the stored `I` into `uint64_t`.
   }
 
   /// Partially specialize lexicographical insertions based on template types.
-  void lexInsert(const uint64_t *cursor, V val) final {
+  void lexInsert(const uint64_t *lvlInd, V val) final {
+    assert(lvlInd && "Received nullptr for level-indices");
     // First, wrap up pending insertion path.
-    uint64_t diff = 0;
-    uint64_t top = 0;
+    uint64_t diffLvl = 0;
+    uint64_t topIdx = 0;
     if (!values.empty()) {
-      diff = lexDiff(cursor);
-      endPath(diff + 1);
-      top = idx[diff] + 1;
+      diffLvl = lexDiff(lvlInd);
+      endPath(diffLvl + 1);
+      topIdx = lvlCursor[diffLvl] + 1;
     }
     // Then continue with insertion path.
-    insPath(cursor, diff, top, val);
+    insPath(lvlInd, diffLvl, topIdx, val);
   }
 
   /// Partially specialize expanded insertions based on template types.
-  /// Note that this method resets the values/filled-switch array back
-  /// to all-zero/false while only iterating over the nonzero elements.
-  void expInsert(uint64_t *cursor, V *values, bool *filled, uint64_t *added,
+  void expInsert(uint64_t *lvlInd, V *values, bool *filled, uint64_t *added,
                  uint64_t count) final {
+    assert((lvlInd && values && filled && added) && "Received nullptr");
     if (count == 0)
       return;
     // Sort.
     std::sort(added, added + count);
     // Restore insertion path for first insert.
-    const uint64_t lastDim = getRank() - 1;
+    const uint64_t lastLvl = getLvlRank() - 1;
     uint64_t index = added[0];
     assert(filled[index] && "added index is not filled");
-    cursor[lastDim] = index;
-    lexInsert(cursor, values[index]);
+    lvlInd[lastLvl] = index;
+    lexInsert(lvlInd, values[index]);
     values[index] = 0;
     filled[index] = false;
     // Subsequent insertions are quick.
@@ -301,8 +455,8 @@
       assert(index < added[i] && "non-lexicographic insertion");
       index = added[i];
       assert(filled[index] && "added index is not filled");
-      cursor[lastDim] = index;
-      insPath(cursor, lastDim, added[i - 1] + 1, values[index]);
+      lvlInd[lastLvl] = index;
+      insPath(lvlInd, lastLvl, added[i - 1] + 1, values[index]);
       values[index] = 0;
       filled[index] = false;
     }
@@ -316,103 +470,110 @@
       endPath(0);
   }
 
-  /// Allocates a new enumerator for this classes `<P,I,V>` types and
+  /// Allocates a new enumerator for this class's `<P,I,V>` types and
   /// erase the `<P,I>` parts from the type.  Callers must make sure to
   /// delete the enumerator when they're done with it.
-  void newEnumerator(SparseTensorEnumeratorBase<V> **out, uint64_t rank,
-                     const uint64_t *perm) const final {
-    *out = new SparseTensorEnumerator<P, I, V>(*this, rank, perm);
+  void newEnumerator(SparseTensorEnumeratorBase<V> **out, uint64_t trgRank,
+                     const uint64_t *trgSizes, uint64_t srcRank,
+                     const uint64_t *src2trg) const final {
+    assert(out && "Received nullptr for out parameter");
+    *out = new SparseTensorEnumerator<P, I, V>(*this, trgRank, trgSizes,
+                                               srcRank, src2trg);
   }
 
-  /// Returns this sparse tensor storage scheme as a new memory-resident
-  /// sparse tensor in coordinate scheme with the given dimension order.
+  /// Allocates a new COO object and initializes it with the contents
+  /// of this tensor under the given mapping from the `getDimSizes()`
+  /// coordinate-space to the `trgSizes` coordinate-space.  Callers must
+  /// make sure to delete the COO when they're done with it.
   ///
-  /// Precondition: `perm` must be valid for `getRank()`.
-  SparseTensorCOO<V> *toCOO(const uint64_t *perm) const {
-    SparseTensorEnumeratorBase<V> *enumerator;
-    newEnumerator(&enumerator, getRank(), perm);
-    SparseTensorCOO<V> *coo =
-        new SparseTensorCOO<V>(enumerator->permutedSizes(), values.size());
-    enumerator->forallElements([&coo](const std::vector<uint64_t> &ind, V val) {
-      coo->add(ind, val);
-    });
+  /// Preconditions/assertions are as per the `SparseTensorEnumerator` ctor.
+  SparseTensorCOO<V> *toCOO(uint64_t trgRank, const uint64_t *trgSizes,
+                            uint64_t srcRank, const uint64_t *src2trg) const {
+    // We inline `newEnumerator` to avoid virtual dispatch and allocation.
+    SparseTensorEnumerator<P, I, V> enumerator(*this, trgRank, trgSizes,
+                                               srcRank, src2trg);
+    auto *coo = new SparseTensorCOO<V>(trgRank, trgSizes, values.size());
+    enumerator.forallElements(
+        [&coo](const auto &trgInd, V val) { coo->add(trgInd, val); });
     // TODO: This assertion assumes there are no stored zeros,
     // or if there are then that we don't filter them out.
     // Cf., <https://github.com/llvm/llvm-project/issues/54179>
     assert(coo->getElements().size() == values.size());
-    delete enumerator;
     return coo;
   }
 
 private:
-  /// Appends an arbitrary new position to `pointers[d]`.  This method
+  /// Appends an arbitrary new position to `pointers[l]`.  This method
   /// checks that `pos` is representable in the `P` type; however, it
   /// does not check that `pos` is semantically valid (i.e., larger than
-  /// the previous position and smaller than `indices[d].capacity()`).
-  void appendPointer(uint64_t d, uint64_t pos, uint64_t count = 1) {
-    ASSERT_COMPRESSED_DIM(d);
+  /// the previous position and smaller than `indices[l].capacity()`).
+  void appendPointer(uint64_t l, uint64_t pos, uint64_t count = 1) {
+    ASSERT_COMPRESSED_LVL(l);
     assert(pos <= std::numeric_limits<P>::max() &&
            "Pointer value is too large for the P-type");
-    pointers[d].insert(pointers[d].end(), count, static_cast<P>(pos));
+    pointers[l].insert(pointers[l].end(), count, static_cast<P>(pos));
   }
 
-  /// Appends index `i` to dimension `d`, in the semantically general
-  /// sense.  For non-dense dimensions, that means appending to the
-  /// `indices[d]` array, checking that `i` is representable in the `I`
-  /// type; however, we do not verify other semantic requirements (e.g.,
-  /// that `i` is in bounds for `dimSizes[d]`, and not previously occurring
-  /// in the same segment).  For dense dimensions, this method instead
-  /// appends the appropriate number of zeros to the `values` array,
-  /// where `full` is the number of "entries" already written to `values`
-  /// for this segment (aka one after the highest index previously appended).
-  void appendIndex(uint64_t d, uint64_t full, uint64_t i) {
-    if (isCompressedDim(d) || isSingletonDim(d)) {
+  /// Appends index `i` to level `l`, in the semantically general sense.
+  /// For non-dense levels, that means appending to the `indices[l]` array,
+  /// checking that `i` is representable in the `I` type; however, we do
+  /// not verify other semantic requirements (e.g., that `i` is in bounds
+  /// for `lvlSizes[l]`, and not previously occurring in the same segment).
+  /// For dense levels, this method instead appends the appropriate number
+  /// of zeros to the `values` array, where `full` is the number of "entries"
+  /// already written to `values` for this segment (aka one after the highest
+  /// index previously appended).
+  void appendIndex(uint64_t l, uint64_t full, uint64_t i) {
+    const auto dlt = getLvlType(l); // Avoid redundant bounds checking.
+    if (isCompressedDLT(dlt) || isSingletonDLT(dlt)) {
       assert(i <= std::numeric_limits<I>::max() &&
              "Index value is too large for the I-type");
-      indices[d].push_back(static_cast<I>(i));
+      indices[l].push_back(static_cast<I>(i));
     } else { // Dense dimension.
-      ASSERT_DENSE_DIM(d);
+      ASSERT_DENSE_DLT(dlt);
       assert(i >= full && "Index was already filled");
       if (i == full)
         return; // Short-circuit, since it'll be a nop.
-      if (d + 1 == getRank())
+      if (l + 1 == getLvlRank())
         values.insert(values.end(), i - full, 0);
       else
-        finalizeSegment(d + 1, 0, i - full);
+        finalizeSegment(l + 1, 0, i - full);
     }
   }
 
-  /// Writes the given coordinate to `indices[d][pos]`.  This method
+  /// Writes the given coordinate to `indices[l][pos]`.  This method
   /// checks that `i` is representable in the `I` type; however, it
   /// does not check that `i` is semantically valid (i.e., in bounds
-  /// for `dimSizes[d]` and not elsewhere occurring in the same segment).
-  void writeIndex(uint64_t d, uint64_t pos, uint64_t i) {
-    ASSERT_COMPRESSED_OR_SINGLETON_DIM(d);
+  /// for `dimSizes[l]` and not elsewhere occurring in the same segment).
+  void writeIndex(uint64_t l, uint64_t pos, uint64_t i) {
+    ASSERT_COMPRESSED_OR_SINGLETON_LVL(l);
     // Subscript assignment to `std::vector` requires that the `pos`-th
     // entry has been initialized; thus we must be sure to check `size()`
     // here, instead of `capacity()` as would be ideal.
-    assert(pos < indices[d].size() && "Index position is out of bounds");
+    assert(pos < indices[l].size() && "Index position is out of bounds");
     assert(i <= std::numeric_limits<I>::max() &&
            "Index value is too large for the I-type");
-    indices[d][pos] = static_cast<I>(i);
+    indices[l][pos] = static_cast<I>(i);
   }
 
-  /// Computes the assembled-size associated with the `d`-th dimension,
-  /// given the assembled-size associated with the `(d-1)`-th dimension.
+  /// Computes the assembled-size associated with the `l`-th level,
+  /// given the assembled-size associated with the `(l-1)`-th level.
   /// "Assembled-sizes" correspond to the (nominal) sizes of overhead
-  /// storage, as opposed to "dimension-sizes" which are the cardinality
-  /// of coordinates for that dimension.
+  /// storage, as opposed to "level-sizes" which are the cardinality
+  /// of possible coordinates for that level.
   ///
-  /// Precondition: the `pointers[d]` array must be fully initialized
+  /// Precondition: the `pointers[l]` array must be fully initialized
   /// before calling this method.
-  uint64_t assembledSize(uint64_t parentSz, uint64_t d) const {
-    if (isCompressedDim(d))
-      return pointers[d][parentSz];
-    if (isSingletonDim(d))
+  uint64_t assembledSize(uint64_t parentSz, uint64_t l) const {
+    const auto dlt = getLvlType(l); // Avoid redundant bounds checking.
+    if (isCompressedDLT(dlt))
+      return pointers[l][parentSz];
+    if (isSingletonDLT(dlt))
       return parentSz; // New size is same as the parent.
-    if (isDenseDim(d))
-      return parentSz * getDimSizes()[d];
-    MLIR_SPARSETENSOR_FATAL("unsupported dimension level type");
+    if (isDenseDLT(dlt))
+      return parentSz * getLvlSizes()[l];
+    MLIR_SPARSETENSOR_FATAL("unsupported level type: %d\n",
+                            static_cast<uint8_t>(dlt));
   }
 
   /// Initializes sparse tensor storage scheme from a memory-resident sparse
@@ -420,95 +581,101 @@
   /// indices arrays under the given per-dimension dense/sparse annotations.
   ///
   /// Preconditions:
-  /// (1) the `elements` must be lexicographically sorted.
-  /// (2) the indices of every element are valid for `dimSizes` (equal rank
-  ///     and pointwise less-than).
-  void fromCOO(const std::vector<Element<V>> &elements, uint64_t lo,
-               uint64_t hi, uint64_t d) {
-    const uint64_t rank = getRank();
-    assert(d <= rank && hi <= elements.size());
+  /// * the `lvlElements` must be lexicographically sorted.
+  /// * the indices of every element are valid for `getLvlSizes()`
+  ///   (i.e., equal rank and pointwise less-than).
+  void fromCOO(const std::vector<Element<V>> &lvlElements, uint64_t lo,
+               uint64_t hi, uint64_t l) {
+    const uint64_t lvlRank = getLvlRank();
+    assert(l <= lvlRank && hi <= lvlElements.size());
     // Once dimensions are exhausted, insert the numerical values.
-    if (d == rank) {
+    if (l == lvlRank) {
       assert(lo < hi);
-      values.push_back(elements[lo].value);
+      values.push_back(lvlElements[lo].value);
       return;
     }
     // Visit all elements in this interval.
     uint64_t full = 0;
-    while (lo < hi) { // If `hi` is unchanged, then `lo < elements.size()`.
-      // Find segment in interval with same index elements in this dimension.
-      const uint64_t i = elements[lo].indices[d];
+    while (lo < hi) { // If `hi` is unchanged, then `lo < lvlElements.size()`.
+      // Find segment in interval with same index elements in this level.
+      const uint64_t i = lvlElements[lo].indices[l];
       uint64_t seg = lo + 1;
-      if (isUniqueDim(d))
-        while (seg < hi && elements[seg].indices[d] == i)
+      if (isUniqueLvl(l))
+        while (seg < hi && lvlElements[seg].indices[l] == i)
           ++seg;
-      // Handle segment in interval for sparse or dense dimension.
-      appendIndex(d, full, i);
+      // Handle segment in interval for sparse or dense level.
+      appendIndex(l, full, i);
       full = i + 1;
-      fromCOO(elements, lo, seg, d + 1);
+      fromCOO(lvlElements, lo, seg, l + 1);
       // And move on to next segment in interval.
       lo = seg;
     }
-    // Finalize the sparse pointer structure at this dimension.
-    finalizeSegment(d, full);
+    // Finalize the sparse pointer structure at this level.
+    finalizeSegment(l, full);
   }
 
-  /// Finalizes the sparse pointer structure at this dimension.
-  void finalizeSegment(uint64_t d, uint64_t full = 0, uint64_t count = 1) {
+  /// Finalizes the sparse pointer structure at this level.
+  void finalizeSegment(uint64_t l, uint64_t full = 0, uint64_t count = 1) {
     if (count == 0)
       return; // Short-circuit, since it'll be a nop.
-    if (isCompressedDim(d)) {
-      appendPointer(d, indices[d].size(), count);
-    } else if (isSingletonDim(d)) {
+    const auto dlt = getLvlType(l); // Avoid redundant bounds checking.
+    if (isCompressedDLT(dlt)) {
+      appendPointer(l, indices[l].size(), count);
+    } else if (isSingletonDLT(dlt)) {
       return; // Nothing to finalize.
-    } else { // Dense dimension.
-      ASSERT_DENSE_DIM(d);
-      const uint64_t sz = getDimSizes()[d];
+    } else {  // Dense dimension.
+      ASSERT_DENSE_DLT(dlt);
+      const uint64_t sz = getLvlSizes()[l];
       assert(sz >= full && "Segment is overfull");
       count = detail::checkedMul(count, sz - full);
       // For dense storage we must enumerate all the remaining coordinates
-      // in this dimension (i.e., coordinates after the last non-zero
+      // in this level (i.e., coordinates after the last non-zero
       // element), and either fill in their zero values or else recurse
-      // to finalize some deeper dimension.
-      if (d + 1 == getRank())
+      // to finalize some deeper level.
+      if (l + 1 == getLvlRank())
         values.insert(values.end(), count, 0);
       else
-        finalizeSegment(d + 1, 0, count);
+        finalizeSegment(l + 1, 0, count);
     }
   }
 
   /// Wraps up a single insertion path, inner to outer.
-  void endPath(uint64_t diff) {
-    const uint64_t rank = getRank();
-    assert(diff <= rank && "Dimension-diff is out of bounds");
-    for (uint64_t i = 0; i < rank - diff; ++i) {
-      const uint64_t d = rank - i - 1;
-      finalizeSegment(d, idx[d] + 1);
+  void endPath(uint64_t diffLvl) {
+    const uint64_t lvlRank = getLvlRank();
+    const uint64_t lastLvl = lvlRank - 1;
+    assert(diffLvl <= lvlRank && "Level-diff is out of bounds");
+    const uint64_t stop = lvlRank - diffLvl;
+    for (uint64_t i = 0; i < stop; ++i) {
+      const uint64_t l = lastLvl - i;
+      finalizeSegment(l, lvlCursor[l] + 1);
     }
   }
 
-  /// Continues a single insertion path, outer to inner.
-  void insPath(const uint64_t *cursor, uint64_t diff, uint64_t top, V val) {
-    const uint64_t rank = getRank();
-    assert(diff <= rank && "Dimension-diff is out of bounds");
-    for (uint64_t d = diff; d < rank; ++d) {
-      const uint64_t i = cursor[d];
-      appendIndex(d, top, i);
-      top = 0;
-      idx[d] = i;
+  /// Continues a single insertion path, outer to inner.  The first
+  /// argument is the storage-level indices for the value being inserted.
+  void insPath(const uint64_t *lvlInd, uint64_t diffLvl, uint64_t topIdx,
+               V val) {
+    const uint64_t lvlRank = getLvlRank();
+    assert(diffLvl <= lvlRank && "Level-diff is out of bounds");
+    for (uint64_t l = diffLvl; l < lvlRank; ++l) {
+      const uint64_t i = lvlInd[l];
+      appendIndex(l, topIdx, i);
+      topIdx = 0;
+      lvlCursor[l] = i;
     }
     values.push_back(val);
   }
 
-  /// Finds the lexicographic differing dimension.
-  uint64_t lexDiff(const uint64_t *cursor) const {
-    const uint64_t rank = getRank();
-    for (uint64_t r = 0; r < rank; ++r)
-      if (cursor[r] > idx[r])
-        return r;
+  /// Finds the lexicographically first level where the level-indices
+  /// in the argument differ from those in the current cursor.
+  uint64_t lexDiff(const uint64_t *lvlInd) const {
+    const uint64_t lvlRank = getLvlRank();
+    for (uint64_t l = 0; l < lvlRank; ++l)
+      if (lvlInd[l] > lvlCursor[l])
+        return l;
       else
-        assert(cursor[r] == idx[r] && "non-lexicographic insertion");
-    assert(0 && "duplication insertion");
+        assert(lvlInd[l] == lvlCursor[l] && "non-lexicographic insertion");
+    assert(0 && "duplicate insertion");
     return -1u;
   }
 
@@ -520,12 +687,12 @@
   std::vector<std::vector<P>> pointers;
   std::vector<std::vector<I>> indices;
   std::vector<V> values;
-  std::vector<uint64_t> idx; // index cursor for lexicographic insertion.
+  std::vector<uint64_t> lvlCursor; // cursor for lexicographic insertion.
 };
 
-#undef ASSERT_COMPRESSED_OR_SINGLETON_DIM
-#undef ASSERT_COMPRESSED_DIM
-#undef ASSERT_VALID_DIM
+#undef ASSERT_COMPRESSED_OR_SINGLETON_LVL
+#undef ASSERT_COMPRESSED_LVL
+#undef ASSERT_VALID_LVL
 
 //===----------------------------------------------------------------------===//
 /// A (higher-order) function object for enumerating the elements of some
@@ -549,27 +716,35 @@
 /// than simply using this class, is to avoid the cost of virtual-method
 /// dispatch within the loop-nest.
 template <typename V>
-class SparseTensorEnumeratorBase {
+class MLIR_SPARSETENSOR_GSL_POINTER [[nodiscard]] SparseTensorEnumeratorBase {
 public:
-  /// Constructs an enumerator with the given permutation for mapping
-  /// the semantic-ordering of dimensions to the desired target-ordering.
+  /// Constructs an enumerator which automatically applies the given
+  /// mapping from the source tensor's dimensions to the desired
+  /// target tensor dimensions.
   ///
   /// Preconditions:
-  /// * the `tensor` must have the same `V` value type.
-  /// * `perm` must be valid for `rank`.
-  SparseTensorEnumeratorBase(const SparseTensorStorageBase &tensor,
-                             uint64_t rank, const uint64_t *perm)
-      : src(tensor), permsz(src.getRev().size()), reord(getRank()),
-        cursor(getRank()) {
-    assert(perm && "Received nullptr for permutation");
-    assert(rank == getRank() && "Permutation rank mismatch");
-    const auto &rev = src.getRev();           // source-order -> semantic-order
-    const auto &dimSizes = src.getDimSizes(); // in source storage-order
-    for (uint64_t s = 0; s < rank; ++s) {     // `s` source storage-order
-      uint64_t t = perm[rev[s]];              // `t` target-order
-      reord[s] = t;
-      permsz[t] = dimSizes[s];
-    }
+  /// * the `src` must have the same `V` value type.
+  /// * `trgSizes` must be valid for `trgRank`.
+  /// * `src2trg` must be valid for `srcRank`, and must map indices
+  ///   valid for `src.getDimSizes()` to indices valid for `trgSizes`.
+  ///
+  /// Asserts:
+  /// * `trgSizes` must be nonnull and must contain only nonzero sizes.
+  /// * `srcRank == src.getDimRank()`.
+  /// * `src2trg` must be nonnull.
+  SparseTensorEnumeratorBase(const SparseTensorStorageBase &src,
+                             uint64_t trgRank, const uint64_t *trgSizes,
+                             uint64_t srcRank, const uint64_t *src2trg)
+      : src(src), trgSizes(trgSizes, trgSizes + trgRank),
+        lvl2trg(src.getLvlRank()), trgCursor(trgRank) {
+    assert(trgSizes && "Received nullptr for target-sizes");
+    assert(src2trg && "Received nullptr for source-to-target mapping");
+    assert(srcRank == src.getDimRank() && "Source-rank mismatch");
+    for (uint64_t t = 0; t < trgRank; ++t)
+      assert(trgSizes[t] > 0 && "Target-size zero has trivial storage");
+    const auto &lvl2src = src.getLvl2Dim();
+    for (uint64_t lvlRank = src.getLvlRank(), l = 0; l < lvlRank; ++l)
+      lvl2trg[l] = src2trg[lvl2src[l]];
   }
 
   virtual ~SparseTensorEnumeratorBase() = default;
@@ -580,14 +755,18 @@
   SparseTensorEnumeratorBase &
   operator=(const SparseTensorEnumeratorBase &) = delete;
 
-  /// Returns the source/target tensor's rank.  (The source-rank and
-  /// target-rank are always equal since we only support permutations.
-  /// Though once we add support for other dimension mappings, this
-  /// method will have to be split in two.)
-  uint64_t getRank() const { return permsz.size(); }
+  /// Gets the source's dimension-rank.
+  uint64_t getSrcDimRank() const { return src.getDimRank(); }
 
-  /// Returns the target tensor's dimension sizes.
-  const std::vector<uint64_t> &permutedSizes() const { return permsz; }
+  /// Gets the target's dimension-/level-rank.  (This is usually
+  /// "dimension-rank", though that may coincide with "level-rank"
+  /// depending on usage.)
+  uint64_t getTrgRank() const { return trgSizes.size(); }
+
+  /// Gets the target's dimension-/level-sizes.  (These are usually
+  /// "dimensions", though that may coincide with "level-rank" depending
+  /// on usage.)
+  const std::vector<uint64_t> &getTrgSizes() const { return trgSizes; }
 
   /// Enumerates all elements of the source tensor, permutes their
   /// indices, and passes the permuted element to the callback.
@@ -598,25 +777,28 @@
 
 protected:
   const SparseTensorStorageBase &src;
-  std::vector<uint64_t> permsz; // in target order.
-  std::vector<uint64_t> reord;  // source storage-order -> target order.
-  std::vector<uint64_t> cursor; // in target order.
+  std::vector<uint64_t> trgSizes;  // in target order.
+  std::vector<uint64_t> lvl2trg;   // source-levels -> target-dims/lvls.
+  std::vector<uint64_t> trgCursor; // in target order.
 };
 
 //===----------------------------------------------------------------------===//
 template <typename P, typename I, typename V>
-class SparseTensorEnumerator final : public SparseTensorEnumeratorBase<V> {
+class MLIR_SPARSETENSOR_GSL_POINTER [[nodiscard]] SparseTensorEnumerator final
+    : public SparseTensorEnumeratorBase<V> {
   using Base = SparseTensorEnumeratorBase<V>;
   using StorageImpl = SparseTensorStorage<P, I, V>;
 
 public:
-  /// Constructs an enumerator with the given permutation for mapping
-  /// the semantic-ordering of dimensions to the desired target-ordering.
+  /// Constructs an enumerator which automatically applies the given
+  /// mapping from the source tensor's dimensions to the desired
+  /// target tensor dimensions.
   ///
-  /// Precondition: `perm` must be valid for `rank`.
-  SparseTensorEnumerator(const StorageImpl &tensor, uint64_t rank,
-                         const uint64_t *perm)
-      : Base(tensor, rank, perm) {}
+  /// Preconditions/assertions are as per the `SparseTensorEnumeratorBase` ctor.
+  SparseTensorEnumerator(const StorageImpl &src, uint64_t trgRank,
+                         const uint64_t *trgSizes, uint64_t srcRank,
+                         const uint64_t *src2trg)
+      : Base(src, trgRank, trgSizes, srcRank, src2trg) {}
 
   ~SparseTensorEnumerator() final = default;
 
@@ -625,46 +807,55 @@
   }
 
 private:
+  // TODO: Once we functionalize the mappings, then we'll no longer
+  // be able to use the current approach of constructing `lvl2trg` in the
+  // ctor and using it to incrementally fill the `trgCursor` cursor as we
+  // recurse through `forallElements`.  Instead we'll want to incrementally
+  // fill a `lvlCursor` as we recurse, and then use `src.getLvl2Dim()`
+  // and `src2trg` to convert it just before yielding to the callback.
+  // It's probably most efficient to just store the `srcCursor` and
+  // `trgCursor` buffers in this object, but we may want to benchmark
+  // that against using `std::calloc` to stack-allocate them instead.
+  //
   /// The recursive component of the public `forallElements`.
   void forallElements(ElementConsumer<V> yield, uint64_t parentPos,
-                      uint64_t d) {
+                      uint64_t l) {
     // Recover the `<P,I,V>` type parameters of `src`.
     const auto &src = static_cast<const StorageImpl &>(this->src);
-    if (d == Base::getRank()) {
+    if (l == src.getLvlRank()) {
       assert(parentPos < src.values.size() &&
              "Value position is out of bounds");
       // TODO: <https://github.com/llvm/llvm-project/issues/54179>
-      yield(this->cursor, src.values[parentPos]);
+      yield(this->trgCursor, src.values[parentPos]);
       return;
     }
-    const auto dlt = src.getDimType(d);
+    uint64_t &cursorL = this->trgCursor[this->lvl2trg[l]];
+    const auto dlt = src.getLvlType(l); // Avoid redundant bounds checking.
     if (isCompressedDLT(dlt)) {
-      // Look up the bounds of the `d`-level segment determined by the
-      // `d-1`-level position `parentPos`.
-      const std::vector<P> &pointersD = src.pointers[d];
-      assert(parentPos + 1 < pointersD.size() &&
+      // Look up the bounds of the `l`-level segment determined by the
+      // `(l - 1)`-level position `parentPos`.
+      const std::vector<P> &pointersL = src.pointers[l];
+      assert(parentPos + 1 < pointersL.size() &&
              "Parent pointer position is out of bounds");
-      const uint64_t pstart = static_cast<uint64_t>(pointersD[parentPos]);
-      const uint64_t pstop = static_cast<uint64_t>(pointersD[parentPos + 1]);
-      // Loop-invariant code for looking up the `d`-level coordinates/indices.
-      const std::vector<I> &indicesD = src.indices[d];
-      assert(pstop <= indicesD.size() && "Index position is out of bounds");
-      uint64_t &cursorReordD = this->cursor[this->reord[d]];
+      const uint64_t pstart = static_cast<uint64_t>(pointersL[parentPos]);
+      const uint64_t pstop = static_cast<uint64_t>(pointersL[parentPos + 1]);
+      // Loop-invariant code for looking up the `l`-level coordinates/indices.
+      const std::vector<I> &indicesL = src.indices[l];
+      assert(pstop <= indicesL.size() && "Index position is out of bounds");
       for (uint64_t pos = pstart; pos < pstop; ++pos) {
-        cursorReordD = static_cast<uint64_t>(indicesD[pos]);
-        forallElements(yield, pos, d + 1);
+        cursorL = static_cast<uint64_t>(indicesL[pos]);
+        forallElements(yield, pos, l + 1);
       }
     } else if (isSingletonDLT(dlt)) {
-      this->cursor[this->reord[d]] = src.getIndex(d, parentPos);
-      forallElements(yield, parentPos, d + 1);
-    } else {
-      assert(isDenseDLT(dlt)); // TODO: reuse the ASSERT_DENSE_DIM message
-      const uint64_t sz = src.getDimSizes()[d];
+      cursorL = src.getIndex(l, parentPos);
+      forallElements(yield, parentPos, l + 1);
+    } else { // Dense dimension.
+      ASSERT_DENSE_DLT(dlt);
+      const uint64_t sz = src.getLvlSizes()[l];
       const uint64_t pstart = parentPos * sz;
-      uint64_t &cursorReordD = this->cursor[this->reord[d]];
       for (uint64_t i = 0; i < sz; ++i) {
-        cursorReordD = i;
-        forallElements(yield, pstart + i, d + 1);
+        cursorL = i;
+        forallElements(yield, pstart + i, l + 1);
       }
     }
   }
@@ -678,31 +869,39 @@
 /// N.B., this class stores references to the parameters passed to
 /// the constructor; thus, objects of this class must not outlive
 /// those parameters.
-class SparseTensorNNZ final {
+///
+/// This class does not have the "dimension" vs "level" distinction, but
+/// since it is used for initializing the levels of a `SparseTensorStorage`
+/// object, we use the "level" name throughout for the sake of consistency.
+class MLIR_SPARSETENSOR_GSL_POINTER [[nodiscard]] SparseTensorNNZ final {
 public:
-  /// Allocates the statistics structure for the desired sizes and
-  /// sparsity (in the target tensor's storage-order).  This constructor
-  /// does not actually populate the statistics, however; for that see
-  /// `initialize`.
+  /// Allocates the statistics structure for the desired target-tensor
+  /// level structure (i.e., sizes and types).  This constructor does not
+  /// actually populate the statistics, however; for that see `initialize`.
   ///
-  /// Precondition: `dimSizes` must not contain zeros.
-  SparseTensorNNZ(const std::vector<uint64_t> &dimSizes,
-                  const std::vector<DimLevelType> &sparsity);
+  /// Precondition: `lvlSizes` must not contain zeros.
+  /// Asserts: `lvlSizes.size() == lvlTypes.size()`.
+  SparseTensorNNZ(const std::vector<uint64_t> &lvlSizes,
+                  const std::vector<DimLevelType> &lvlTypes);
 
   // We disallow copying to help avoid leaking the stored references.
   SparseTensorNNZ(const SparseTensorNNZ &) = delete;
   SparseTensorNNZ &operator=(const SparseTensorNNZ &) = delete;
 
-  /// Returns the rank of the target tensor.
-  uint64_t getRank() const { return dimSizes.size(); }
+  /// Gets the target-tensor's level-rank.
+  uint64_t getLvlRank() const { return lvlSizes.size(); }
 
-  /// Enumerates the source tensor to fill in the statistics.  The
-  /// enumerator should already incorporate the permutation (from
-  /// semantic-order to the target storage-order).
+  /// Enumerates the source tensor to fill in the statistics.
+  /// The enumerator should already incorporate the mapping from
+  /// the source tensor-dimensions to the target storage-levels.
+  ///
+  /// Asserts:
+  /// * `enumerator.getTrgRank() == getLvlRank()`.
+  /// * `enumerator.getTrgSizes() == lvlSizes`.
   template <typename V>
   void initialize(SparseTensorEnumeratorBase<V> &enumerator) {
-    assert(enumerator.getRank() == getRank() && "Tensor rank mismatch");
-    assert(enumerator.permutedSizes() == dimSizes && "Tensor size mismatch");
+    assert(enumerator.getTrgRank() == getLvlRank() && "Tensor rank mismatch");
+    assert(enumerator.getTrgSizes() == lvlSizes && "Tensor size mismatch");
     enumerator.forallElements(
         [this](const std::vector<uint64_t> &ind, V) { add(ind); });
   }
@@ -710,86 +909,90 @@
   /// The type of callback functions which receive an nnz-statistic.
   using NNZConsumer = const std::function<void(uint64_t)> &;
 
-  /// Lexicographically enumerates all indicies for dimensions strictly
-  /// less than `stopDim`, and passes their nnz statistic to the callback.
+  /// Lexicographically enumerates all indicies for levels strictly
+  /// less than `stopLvl`, and passes their nnz statistic to the callback.
   /// Since our use-case only requires the statistic not the coordinates
   /// themselves, we do not bother to construct those coordinates.
-  void forallIndices(uint64_t stopDim, NNZConsumer yield) const;
+  void forallIndices(uint64_t stopLvl, NNZConsumer yield) const;
 
 private:
   /// Adds a new element (i.e., increment its statistics).  We use
   /// a method rather than inlining into the lambda in `initialize`,
   /// to avoid spurious templating over `V`.  And this method is private
-  /// to avoid needing to re-assert validity of `ind` (which is guaranteed
-  /// by `forallElements`).
-  void add(const std::vector<uint64_t> &ind);
+  /// to avoid needing to re-assert validity of `lvlInd` (which is
+  /// guaranteed by `forallElements`).
+  void add(const std::vector<uint64_t> &lvlInd);
 
   /// Recursive component of the public `forallIndices`.
-  void forallIndices(NNZConsumer yield, uint64_t stopDim, uint64_t parentPos,
-                     uint64_t d) const;
+  void forallIndices(NNZConsumer yield, uint64_t stopLvl, uint64_t parentPos,
+                     uint64_t l) const;
 
   // All of these are in the target storage-order.
-  const std::vector<uint64_t> &dimSizes;
-  const std::vector<DimLevelType> &dimTypes;
+  const std::vector<uint64_t> &lvlSizes;
+  const std::vector<DimLevelType> &lvlTypes;
   std::vector<std::vector<uint64_t>> nnz;
 };
 
 //===----------------------------------------------------------------------===//
 // Definitions of the ctors and factories of `SparseTensorStorage<P,I,V>`.
 
-namespace detail {
-/// Asserts that the `dimSizes` (in target-order) under the `perm` (mapping
-/// semantic-order to target-order) are a refinement of the desired `shape`
-/// (in semantic-order).
-///
-/// Precondition: `perm` and `shape` must be valid for `rank`.
-void assertPermutedSizesMatchShape(const std::vector<uint64_t> &dimSizes,
-                                   uint64_t rank, const uint64_t *perm,
-                                   const uint64_t *shape);
-} // namespace detail
-
 template <typename P, typename I, typename V>
-SparseTensorStorage<P, I, V> *SparseTensorStorage<P, I, V>::newSparseTensor(
-    uint64_t rank, const uint64_t *shape, const uint64_t *perm,
-    const DimLevelType *sparsity, SparseTensorCOO<V> *coo) {
-  if (coo) {
-    const auto &coosz = coo->getDimSizes();
-#ifndef NDEBUG
-    detail::assertPermutedSizesMatchShape(coosz, rank, perm, shape);
-#endif
-    return new SparseTensorStorage<P, I, V>(coosz, perm, sparsity, coo);
+SparseTensorStorage<P, I, V> *SparseTensorStorage<P, I, V>::newFromCOO(
+    uint64_t dimRank, const uint64_t *dimShape, uint64_t lvlRank,
+    const DimLevelType *lvlTypes, const uint64_t *lvl2dim,
+    SparseTensorCOO<V> &lvlCOO) {
+  assert(dimShape && "Got nullptr for dimension shape");
+  assert(lvl2dim && "Got nullptr for level-to-dimension mapping");
+  const auto &lvlSizes = lvlCOO.getDimSizes();
+  assert(lvlRank == lvlSizes.size() && "Level-rank mismatch");
+  // Must reconstruct `dimSizes` from `lvlSizes`.  While this is easy
+  // enough to do when `lvl2dim` is a permutation, this approach will
+  // not work for more general mappings; so we will need to move this
+  // computation off to codegen.
+  std::vector<uint64_t> dimSizes(dimRank);
+  for (uint64_t l = 0; l < lvlRank; ++l) {
+    const uint64_t d = lvl2dim[l];
+    const uint64_t sz = dimShape[d];
+    assert((sz == 0 || sz == lvlSizes[l]) &&
+           "Dimension sizes do not match expected shape");
+    dimSizes[d] = lvlSizes[l];
   }
-  std::vector<uint64_t> permsz(rank);
-  for (uint64_t r = 0; r < rank; ++r) {
-    assert(shape[r] > 0 && "Dimension size zero has trivial storage");
-    permsz[perm[r]] = shape[r];
-  }
-  // We pass the null `coo` to ensure we select the intended constructor.
-  return new SparseTensorStorage<P, I, V>(permsz, perm, sparsity, coo);
+  return new SparseTensorStorage<P, I, V>(dimRank, dimSizes.data(), lvlRank,
+                                          lvlTypes, lvl2dim, lvlCOO);
 }
 
 template <typename P, typename I, typename V>
-SparseTensorStorage<P, I, V> *SparseTensorStorage<P, I, V>::newSparseTensor(
-    uint64_t rank, const uint64_t *shape, const uint64_t *perm,
-    const DimLevelType *sparsity, const SparseTensorStorageBase *source) {
-  assert(source && "Got nullptr for source");
-  SparseTensorEnumeratorBase<V> *enumerator;
-  source->newEnumerator(&enumerator, rank, perm);
-  const auto &permsz = enumerator->permutedSizes();
+SparseTensorStorage<P, I, V> *SparseTensorStorage<P, I, V>::newFromSparseTensor(
+    uint64_t dimRank, const uint64_t *dimShape, uint64_t lvlRank,
+    const uint64_t *lvlSizes, const DimLevelType *lvlTypes,
+    const uint64_t *lvl2dim, uint64_t srcRank, const uint64_t *src2lvl,
+    const SparseTensorStorageBase &source) {
+  // Verify that the `source` dimensions match the expected `dimShape`.
+  assert(dimShape && "Got nullptr for dimension shape");
+  assert(dimRank == source.getDimRank() && "Dimension-rank mismatch");
+  const auto &dimSizes = source.getDimSizes();
 #ifndef NDEBUG
-  detail::assertPermutedSizesMatchShape(permsz, rank, perm, shape);
+  for (uint64_t d = 0; d < dimRank; ++d) {
+    const uint64_t sz = dimShape[d];
+    assert((sz == 0 || sz == dimSizes[d]) &&
+           "Dimension-sizes do not match expected shape");
+  }
 #endif
-  auto *tensor =
-      new SparseTensorStorage<P, I, V>(permsz, perm, sparsity, *source);
-  delete enumerator;
+  SparseTensorEnumeratorBase<V> *lvlEnumerator;
+  source.newEnumerator(&lvlEnumerator, lvlRank, lvlSizes, srcRank, src2lvl);
+  auto *tensor = new SparseTensorStorage<P, I, V>(
+      dimRank, dimSizes.data(), lvlRank, lvlTypes, lvl2dim, *lvlEnumerator);
+  delete lvlEnumerator;
   return tensor;
 }
 
 template <typename P, typename I, typename V>
 SparseTensorStorage<P, I, V>::SparseTensorStorage(
-    const std::vector<uint64_t> &dimSizes, const uint64_t *perm,
-    const DimLevelType *sparsity, SparseTensorCOO<V> *coo)
-    : SparseTensorStorage(dimSizes, perm, sparsity) {
+    uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
+    const uint64_t *lvlSizes, const DimLevelType *lvlTypes,
+    const uint64_t *lvl2dim, bool initializeValuesIfAllDense)
+    : SparseTensorStorage(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes,
+                          lvl2dim) {
   // Provide hints on capacity of pointers and indices.
   // TODO: needs much fine-tuning based on actual sparsity; currently
   //       we reserve pointer/index space based on all previous dense
@@ -797,139 +1000,151 @@
   //       we should really use nnz and dense/sparse distribution.
   bool allDense = true;
   uint64_t sz = 1;
-  for (uint64_t r = 0, rank = getRank(); r < rank; ++r) {
-    if (isCompressedDim(r)) {
-      // TODO: Take a parameter between 1 and `dimSizes[r]`, and multiply
+  for (uint64_t l = 0; l < lvlRank; ++l) {
+    const DimLevelType dlt = lvlTypes[l]; // Avoid redundant bounds checking.
+    if (isCompressedDLT(dlt)) {
+      // TODO: Take a parameter between 1 and `lvlSizes[l]`, and multiply
       // `sz` by that before reserving. (For now we just use 1.)
-      pointers[r].reserve(sz + 1);
-      pointers[r].push_back(0);
-      indices[r].reserve(sz);
+      pointers[l].reserve(sz + 1);
+      pointers[l].push_back(0);
+      indices[l].reserve(sz);
       sz = 1;
       allDense = false;
-    } else if (isSingletonDim(r)) {
-      indices[r].reserve(sz);
+    } else if (isSingletonDLT(dlt)) {
+      indices[l].reserve(sz);
       sz = 1;
       allDense = false;
     } else { // Dense dimension.
-      ASSERT_DENSE_DIM(r);
-      sz = detail::checkedMul(sz, getDimSizes()[r]);
+      ASSERT_DENSE_DLT(dlt);
+      sz = detail::checkedMul(sz, lvlSizes[l]);
     }
   }
-  // Then assign contents from coordinate scheme tensor if provided.
-  if (coo) {
-    // Ensure both preconditions of `fromCOO`.
-    assert(coo->getDimSizes() == getDimSizes() && "Tensor size mismatch");
-    coo->sort();
-    // Now actually insert the `elements`.
-    const std::vector<Element<V>> &elements = coo->getElements();
-    uint64_t nnz = elements.size();
-    values.reserve(nnz);
-    fromCOO(elements, 0, nnz, 0);
-  } else if (allDense) {
+  if (allDense && initializeValuesIfAllDense)
     values.resize(sz, 0);
-  }
+}
+
+template <typename P, typename I, typename V>
+SparseTensorStorage<P, I, V>::SparseTensorStorage( // NOLINT
+    uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
+    const DimLevelType *lvlTypes, const uint64_t *lvl2dim,
+    SparseTensorCOO<V> &lvlCOO)
+    : SparseTensorStorage(dimRank, dimSizes, lvlRank,
+                          lvlCOO.getDimSizes().data(), lvlTypes, lvl2dim,
+                          false) {
+  assert(lvlRank == lvlCOO.getDimSizes().size() && "Level-rank mismatch");
+  // Ensure the preconditions of `fromCOO`.  (One is already ensured by
+  // using `lvlSizes = lvlCOO.getDimSizes()` in the ctor above.)
+  lvlCOO.sort();
+  // Now actually insert the `elements`.
+  const auto &elements = lvlCOO.getElements();
+  uint64_t nnz = elements.size();
+  values.reserve(nnz);
+  fromCOO(elements, 0, nnz, 0);
 }
 
 template <typename P, typename I, typename V>
 SparseTensorStorage<P, I, V>::SparseTensorStorage(
-    const std::vector<uint64_t> &dimSizes, const uint64_t *perm,
-    const DimLevelType *sparsity, const SparseTensorStorageBase &tensor)
-    : SparseTensorStorage(dimSizes, perm, sparsity) {
-  SparseTensorEnumeratorBase<V> *enumerator;
-  tensor.newEnumerator(&enumerator, getRank(), perm);
+    uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
+    const DimLevelType *lvlTypes, const uint64_t *lvl2dim,
+    SparseTensorEnumeratorBase<V> &lvlEnumerator)
+    : SparseTensorStorage(dimRank, dimSizes, lvlRank,
+                          lvlEnumerator.getTrgSizes().data(), lvlTypes,
+                          lvl2dim) {
+  assert(lvlRank == lvlEnumerator.getTrgRank() && "Level-rank mismatch");
   {
     // Initialize the statistics structure.
-    SparseTensorNNZ nnz(getDimSizes(), getDimTypes());
-    nnz.initialize(*enumerator);
+    SparseTensorNNZ nnz(getLvlSizes(), getLvlTypes());
+    nnz.initialize(lvlEnumerator);
     // Initialize "pointers" overhead (and allocate "indices", "values").
-    uint64_t parentSz = 1; // assembled-size (not dimension-size) of `r-1`.
-    for (uint64_t rank = getRank(), r = 0; r < rank; ++r) {
-      if (isCompressedDim(r)) {
-        pointers[r].reserve(parentSz + 1);
-        pointers[r].push_back(0);
+    uint64_t parentSz = 1; // assembled-size of the `(l - 1)`-level.
+    for (uint64_t l = 0; l < lvlRank; ++l) {
+      const auto dlt = lvlTypes[l]; // Avoid redundant bounds checking.
+      if (isCompressedDLT(dlt)) {
+        pointers[l].reserve(parentSz + 1);
+        pointers[l].push_back(0);
         uint64_t currentPos = 0;
-        nnz.forallIndices(r, [this, &currentPos, r](uint64_t n) {
+        nnz.forallIndices(l, [this, &currentPos, l](uint64_t n) {
           currentPos += n;
-          appendPointer(r, currentPos);
+          appendPointer(l, currentPos);
         });
-        assert(pointers[r].size() == parentSz + 1 &&
+        assert(pointers[l].size() == parentSz + 1 &&
                "Final pointers size doesn't match allocated size");
-        // That assertion entails `assembledSize(parentSz, r)`
-        // is now in a valid state.  That is, `pointers[r][parentSz]`
+        // That assertion entails `assembledSize(parentSz, l)`
+        // is now in a valid state.  That is, `pointers[l][parentSz]`
         // equals the present value of `currentPos`, which is the
-        // correct assembled-size for `indices[r]`.
+        // correct assembled-size for `indices[l]`.
       }
       // Update assembled-size for the next iteration.
-      parentSz = assembledSize(parentSz, r);
-      // Ideally we need only `indices[r].reserve(parentSz)`, however
+      parentSz = assembledSize(parentSz, l);
+      // Ideally we need only `indices[l].reserve(parentSz)`, however
       // the `std::vector` implementation forces us to initialize it too.
       // That is, in the yieldPos loop we need random-access assignment
-      // to `indices[r]`; however, `std::vector`'s subscript-assignment
+      // to `indices[l]`; however, `std::vector`'s subscript-assignment
       // only allows assigning to already-initialized positions.
-      if (isCompressedDim(r) || isSingletonDim(r))
-        indices[r].resize(parentSz, 0);
+      if (isCompressedDLT(dlt) || isSingletonDLT(dlt))
+        indices[l].resize(parentSz, 0);
       else
-        ASSERT_DENSE_DIM(r); // Future-proofing.
+        ASSERT_DENSE_DLT(dlt); // Future-proofing.
     }
     values.resize(parentSz, 0); // Both allocate and zero-initialize.
   }
   // The yieldPos loop
-  enumerator->forallElements([this](const std::vector<uint64_t> &ind, V val) {
+  lvlEnumerator.forallElements([this](const auto &lvlInd, V val) {
     uint64_t parentSz = 1, parentPos = 0;
-    for (uint64_t rank = getRank(), r = 0; r < rank; ++r) {
-      if (isCompressedDim(r)) {
+    for (uint64_t lvlRank = getLvlRank(), l = 0; l < lvlRank; ++l) {
+      const auto dlt = getLvlTypes()[l]; // Avoid redundant bounds checking.
+      if (isCompressedDLT(dlt)) {
         // If `parentPos == parentSz` then it's valid as an array-lookup;
         // however, it's semantically invalid here since that entry
-        // does not represent a segment of `indices[r]`.  Moreover, that
+        // does not represent a segment of `indices[l]`.  Moreover, that
         // entry must be immutable for `assembledSize` to remain valid.
         assert(parentPos < parentSz && "Pointers position is out of bounds");
-        const uint64_t currentPos = pointers[r][parentPos];
+        const uint64_t currentPos = pointers[l][parentPos];
         // This increment won't overflow the `P` type, since it can't
-        // exceed the original value of `pointers[r][parentPos+1]`
+        // exceed the original value of `pointers[l][parentPos+1]`
         // which was already verified to be within bounds for `P`
         // when it was written to the array.
-        pointers[r][parentPos]++;
-        writeIndex(r, currentPos, ind[r]);
+        pointers[l][parentPos]++;
+        writeIndex(l, currentPos, lvlInd[l]);
         parentPos = currentPos;
-      } else if (isSingletonDim(r)) {
-        writeIndex(r, parentPos, ind[r]);
+      } else if (isSingletonDLT(dlt)) {
+        writeIndex(l, parentPos, lvlInd[l]);
         // the new parentPos equals the old parentPos.
       } else { // Dense dimension.
-        ASSERT_DENSE_DIM(r);
-        parentPos = parentPos * getDimSizes()[r] + ind[r];
+        ASSERT_DENSE_DLT(dlt);
+        parentPos = parentPos * getLvlSizes()[l] + lvlInd[l];
       }
-      parentSz = assembledSize(parentSz, r);
+      parentSz = assembledSize(parentSz, l);
     }
     assert(parentPos < values.size() && "Value position is out of bounds");
     values[parentPos] = val;
   });
-  // No longer need the enumerator, so we'll delete it ASAP.
-  delete enumerator;
   // The finalizeYieldPos loop
-  for (uint64_t parentSz = 1, rank = getRank(), r = 0; r < rank; ++r) {
-    if (isCompressedDim(r)) {
-      assert(parentSz == pointers[r].size() - 1 &&
+  for (uint64_t parentSz = 1, l = 0; l < lvlRank; ++l) {
+    const auto dlt = lvlTypes[l]; // Avoid redundant bounds checking.
+    if (isCompressedDLT(dlt)) {
+      assert(parentSz == pointers[l].size() - 1 &&
              "Actual pointers size doesn't match the expected size");
       // Can't check all of them, but at least we can check the last one.
-      assert(pointers[r][parentSz - 1] == pointers[r][parentSz] &&
+      assert(pointers[l][parentSz - 1] == pointers[l][parentSz] &&
              "Pointers got corrupted");
       // TODO: optimize this by using `memmove` or similar.
       for (uint64_t n = 0; n < parentSz; ++n) {
         const uint64_t parentPos = parentSz - n;
-        pointers[r][parentPos] = pointers[r][parentPos - 1];
+        pointers[l][parentPos] = pointers[l][parentPos - 1];
       }
-      pointers[r][0] = 0;
+      pointers[l][0] = 0;
     } else {
       // Both dense and singleton are no-ops for the finalizeYieldPos loop.
       // This assertion is for future-proofing.
-      assert((isDenseDim(r) || isSingletonDim(r)) &&
-             "Dimension is neither dense nor singleton");
+      assert((isDenseDLT(dlt) || isSingletonDLT(dlt)) &&
+             "Level is neither dense nor singleton");
     }
-    parentSz = assembledSize(parentSz, r);
+    parentSz = assembledSize(parentSz, l);
   }
 }
 
-#undef ASSERT_DENSE_DIM
+#undef ASSERT_DENSE_DLT
 
 } // namespace sparse_tensor
 } // namespace mlir
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h
--- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h
@@ -56,12 +56,16 @@
 /// kSparseToSparse STS             STS, copied from the STS source
 /// kToIterator     STS             Iterator, call @getNext to use and
 ///                                 @delSparseTensorIterator to free.
-MLIR_CRUNNERUTILS_EXPORT void *
-_mlir_ciface_newSparseTensor(StridedMemRefType<DimLevelType, 1> *aref, // NOLINT
-                             StridedMemRefType<index_type, 1> *sref,
-                             StridedMemRefType<index_type, 1> *pref,
-                             OverheadType ptrTp, OverheadType indTp,
-                             PrimaryType valTp, Action action, void *ptr);
+MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensor( // NOLINT
+    StridedMemRefType<index_type, 1> *dimSizesRef,
+    StridedMemRefType<index_type, 1> *lvlSizesRef,
+    StridedMemRefType<DimLevelType, 1> *lvlTypesRef,
+    StridedMemRefType<index_type, 1> *lvl2dimRef,
+    StridedMemRefType<index_type, 1> *dim2lvlRef, OverheadType ptrTp,
+    OverheadType indTp, PrimaryType valTp, Action action, void *ptr);
+
+// TODO: document what all the arguments are/mean for the functions below,
+// especially with regards to "dim"-vs-"lvl" and mappings/permutations.
 
 /// Tensor-storage method to obtain direct access to the values array.
 #define DECL_SPARSEVALUES(VNAME, V)                                            \
@@ -130,8 +134,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-/// Tensor-storage method to get the size of the given dimension.
-MLIR_CRUNNERUTILS_EXPORT index_type sparseDimSize(void *tensor, index_type d);
+/// Tensor-storage method to get the size of the given level.
+MLIR_CRUNNERUTILS_EXPORT index_type sparseLvlSize(void *tensor, index_type l);
 
 /// Tensor-storage method to finalize lexicographic insertions.
 MLIR_CRUNNERUTILS_EXPORT void endInsert(void *tensor);
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -57,14 +57,14 @@
                                                    operands);
 }
 
-/// Generates dimension size call.
-static Value genDimSizeCall(OpBuilder &builder, Location loc,
+/// Generates call to lookup a level-size.
+static Value genLvlSizeCall(OpBuilder &builder, Location loc,
                             SparseTensorEncodingAttr &enc, Value src,
-                            uint64_t idx) {
+                            uint64_t lvl) {
   // Generate the call.
-  StringRef name = "sparseDimSize";
+  StringRef name = "sparseLvlSize";
   SmallVector<Value, 2> params{
-      src, constantIndex(builder, loc, toStoredDim(enc, idx))};
+      src, constantIndex(builder, loc, toStoredDim(enc, lvl))};
   Type iTp = builder.getIndexType();
   return createFuncCall(builder, loc, name, iTp, params, EmitCInterface::Off)
       .getResult(0);
@@ -72,13 +72,22 @@
 
 /// Compute the size from type (for static sizes) or from an already-converted
 /// opaque pointer source (for dynamic sizes) at the given dimension.
+//
+// FIXME: Need to rename this function to match `genLvlSizeCall` and hence
+// match the naming convention used in the runtime library.  However, it's
+// not entirely clear that all callsites of this function properly make the
+// "level"-vs-"dimension" distinction; so need to audit each callsite to
+// ensure this still does what they mean (possibly by having two separate
+// functions, one for levels and one for dimensions).  That also means
+// renaming `sizesFromPtr`, `sizesFromType`, etc, to make clear whether
+// they mean to be referring to level-sizes vs dimension-sizes.
 static Value sizeFromPtrAtDim(OpBuilder &builder, Location loc,
                               SparseTensorEncodingAttr &enc, ShapedType stp,
-                              Value src, unsigned dim) {
+                              Value src, unsigned i) {
   auto shape = stp.getShape();
-  if (shape[dim] == ShapedType::kDynamicSize)
-    return genDimSizeCall(builder, loc, enc, src, dim);
-  return constantIndex(builder, loc, shape[dim]);
+  if (shape[i] == ShapedType::kDynamicSize)
+    return genLvlSizeCall(builder, loc, enc, src, i);
+  return constantIndex(builder, loc, shape[i]);
 }
 
 /// Populates given sizes array from type (for static sizes) and from
@@ -225,17 +234,19 @@
   }
 
 private:
-  static constexpr unsigned kNumStaticParams = 6;
+  static constexpr unsigned kNumStaticParams = 8;
   static constexpr unsigned kNumDynamicParams = 2;
   static constexpr unsigned kNumParams = kNumStaticParams + kNumDynamicParams;
-  static constexpr unsigned kParamLvlTypes = 0;
-  static constexpr unsigned kParamDimSizes = 1;
-  static constexpr unsigned kParamDim2Lvl = 2;
-  static constexpr unsigned kParamPtrTp = 3;
-  static constexpr unsigned kParamIndTp = 4;
-  static constexpr unsigned kParamValTp = 5;
-  static constexpr unsigned kParamAction = 6;
-  static constexpr unsigned kParamPtr = 7;
+  static constexpr unsigned kParamDimSizes = 0;
+  static constexpr unsigned kParamLvlSizes = 1;
+  static constexpr unsigned kParamLvlTypes = 2;
+  static constexpr unsigned kParamLvl2Dim = 3;
+  static constexpr unsigned kParamDim2Lvl = 4;
+  static constexpr unsigned kParamPtrTp = 5;
+  static constexpr unsigned kParamIndTp = 6;
+  static constexpr unsigned kParamValTp = 7;
+  static constexpr unsigned kParamAction = 8;
+  static constexpr unsigned kParamPtr = 9;
 
   OpBuilder &builder;
   Location loc;
@@ -260,10 +271,17 @@
   // verification of external data, or for construction of internal data.
   assert(dimSizes.size() == dimRank && "Dimension-rank mismatch");
   params[kParamDimSizes] = genBuffer(builder, loc, dimSizes);
-  // The dimension-to-level mapping.  We must preinitialize `dim2lvl`
-  // so that the true branch below can perform random-access `operator[]`
-  // assignment.
+  // The level-sizes array must be passed as well, since for arbitrary
+  // dim2lvl mappings it cannot be trivially reconstructed at runtime.
+  // For now however, since we're still assuming permutations, we will
+  // initialize this parameter alongside the `dim2lvl` and `lvl2dim`
+  // parameters below.  We preinitialize `lvlSizes` for code symmetry.
+  SmallVector<Value, 4> lvlSizes(lvlRank);
+  // The dimension-to-level mapping and its inverse.  We must preinitialize
+  // `dim2lvl` so that the true branch below can perform random-access
+  // `operator[]` assignment.  We preinitialize `lvl2dim` for code symmetry.
   SmallVector<Value, 4> dim2lvl(dimRank);
+  SmallVector<Value, 4> lvl2dim(lvlRank);
   auto dimOrder = enc.getDimOrdering();
   if (dimOrder) {
     assert(dimOrder.isPermutation());
@@ -271,13 +289,20 @@
       // The `d`th source variable occurs in the `l`th result position.
       uint64_t d = dimOrder.getDimPosition(l);
       dim2lvl[d] = constantIndex(builder, loc, l);
+      lvl2dim[l] = constantIndex(builder, loc, d);
+      lvlSizes[l] = dimSizes[d];
     }
   } else {
     assert(dimRank == lvlRank && "Rank mismatch");
-    for (unsigned i = 0; i < lvlRank; i++)
-      dim2lvl[i] = constantIndex(builder, loc, i);
+    for (unsigned i = 0; i < lvlRank; i++) {
+      dim2lvl[i] = lvl2dim[i] = constantIndex(builder, loc, i);
+      lvlSizes[i] = dimSizes[i];
+    }
   }
-  params[kParamDim2Lvl] = genBuffer(builder, loc, dim2lvl);
+  params[kParamLvlSizes] = genBuffer(builder, loc, lvlSizes);
+  params[kParamLvl2Dim] = genBuffer(builder, loc, lvl2dim);
+  params[kParamDim2Lvl] =
+      dimOrder ? genBuffer(builder, loc, dim2lvl) : params[kParamLvl2Dim];
   // Secondary and primary types encoding.
   setTemplateTypes(enc, stp);
   // Finally, make note that initialization is complete.
@@ -316,9 +341,10 @@
 ///   if val != 0
 ///     t->add(&val, [i1,..,ik], [p1,..,pk]);
 static void genAddEltCall(OpBuilder &builder, Location loc, Type eltType,
-                          Value ptr, Value valPtr, Value ind, Value perm) {
+                          Value lvlCOO, Value valPtr, Value dimInd,
+                          Value dim2lvl) {
   SmallString<9> name{"addElt", primaryTypeFunctionSuffix(eltType)};
-  SmallVector<Value, 4> params{ptr, valPtr, ind, perm};
+  SmallVector<Value, 4> params{lvlCOO, valPtr, dimInd, dim2lvl};
   Type pTp = getOpaquePointerType(builder);
   createFuncCall(builder, loc, name, pTp, params, EmitCInterface::On);
 }
@@ -637,7 +663,7 @@
     Value src = adaptor.getOperands()[0];
     int64_t idx = *index;
     rewriter.replaceOp(op,
-                       genDimSizeCall(rewriter, op->getLoc(), enc, src, idx));
+                       genLvlSizeCall(rewriter, op->getLoc(), enc, src, idx));
     return success();
   }
 };
diff --git a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt
--- a/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt
+++ b/mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt
@@ -8,6 +8,7 @@
 add_mlir_library(MLIRSparseTensorRuntime
   File.cpp
   NNZ.cpp
+  PermutationRef.cpp
   Storage.cpp
 
   EXCLUDE_FROM_LIBMLIR
diff --git a/mlir/lib/ExecutionEngine/SparseTensor/File.cpp b/mlir/lib/ExecutionEngine/SparseTensor/File.cpp
--- a/mlir/lib/ExecutionEngine/SparseTensor/File.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensor/File.cpp
@@ -47,18 +47,24 @@
   }
 }
 
-// TODO(wrengr/bixia): figure out how to reorganize the element-parsing
-// loop of `openSparseTensorCOO` into methods of this class, so we can
-// avoid leaking access to the `line` pointer (both for general hygiene
-// and because we can't mark it const due to the second argument of
-// `strtoul`/`strtoud` being `char * *restrict` rather than
-// `char const* *restrict`).
-//
 /// Attempts to read a line from the file.
-char *SparseTensorReader::readLine() {
-  if (fgets(line, kColWidth, file))
-    return line;
-  MLIR_SPARSETENSOR_FATAL("Cannot read next line of %s\n", filename);
+void SparseTensorReader::readLine() {
+  if (!fgets(line, kColWidth, file))
+    MLIR_SPARSETENSOR_FATAL("Cannot read next line of %s\n", filename);
+}
+
+char *SparseTensorReader::readCOOIndices(uint64_t rank, uint64_t *indices) {
+  assert(rank == getRank() && "Rank mismatch");
+  readLine();
+  // Local variable for tracking the parser's position in the `line` buffer.
+  char *linePtr = line;
+  for (uint64_t r = 0; r < rank; ++r) {
+    // Parse the 1-based index.
+    uint64_t idx = strtoul(linePtr, &linePtr, 10);
+    // Store the 0-based index.
+    indices[r] = idx - 1;
+  }
+  return linePtr;
 }
 
 /// Reads and parses the file's header.
diff --git a/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp b/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp
--- a/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp
@@ -21,27 +21,21 @@
 using namespace mlir::sparse_tensor;
 
 //===----------------------------------------------------------------------===//
-/// Allocate the statistics structure for the desired sizes and
-/// sparsity (in the target tensor's storage-order).  This constructor
-/// does not actually populate the statistics, however; for that see
-/// `initialize`.
-///
-/// Precondition: `dimSizes` must not contain zeros.
-SparseTensorNNZ::SparseTensorNNZ(const std::vector<uint64_t> &dimSizes,
-                                 const std::vector<DimLevelType> &sparsity)
-    : dimSizes(dimSizes), dimTypes(sparsity), nnz(getRank()) {
-  assert(dimSizes.size() == dimTypes.size() && "Rank mismatch");
+SparseTensorNNZ::SparseTensorNNZ(const std::vector<uint64_t> &lvlSizes,
+                                 const std::vector<DimLevelType> &lvlTypes)
+    : lvlSizes(lvlSizes), lvlTypes(lvlTypes), nnz(getLvlRank()) {
+  assert(lvlSizes.size() == lvlTypes.size() && "Rank mismatch");
   bool alreadyCompressed = false;
   (void)alreadyCompressed;
-  uint64_t sz = 1; // the product of all `dimSizes` strictly less than `r`.
-  for (uint64_t rank = getRank(), r = 0; r < rank; r++) {
-    const DimLevelType dlt = sparsity[r];
+  uint64_t sz = 1; // the product of all `lvlSizes` strictly less than `l`.
+  for (uint64_t l = 0, lvlrank = getLvlRank(); l < lvlrank; ++l) {
+    const DimLevelType dlt = lvlTypes[l];
     if (isCompressedDLT(dlt)) {
       if (alreadyCompressed)
         MLIR_SPARSETENSOR_FATAL(
-            "Multiple compressed layers not currently supported");
+            "Multiple compressed levels not currently supported");
       alreadyCompressed = true;
-      nnz[r].resize(sz, 0); // Both allocate and zero-initialize.
+      nnz[l].resize(sz, 0); // Both allocate and zero-initialize.
     } else if (isDenseDLT(dlt)) {
       if (alreadyCompressed)
         MLIR_SPARSETENSOR_FATAL(
@@ -52,51 +46,41 @@
       // when adding support for multiple compressed dimensions or
       // for dense-after-compressed.
     } else {
-      MLIR_SPARSETENSOR_FATAL("unsupported dimension level type: %d\n",
+      MLIR_SPARSETENSOR_FATAL("unsupported level type: %d\n",
                               static_cast<uint8_t>(dlt));
     }
-    sz = detail::checkedMul(sz, dimSizes[r]);
+    sz = detail::checkedMul(sz, lvlSizes[l]);
   }
 }
 
-/// Lexicographically enumerates all indicies for dimensions strictly
-/// less than `stopDim`, and passes their nnz statistic to the callback.
-/// Since our use-case only requires the statistic not the coordinates
-/// themselves, we do not bother to construct those coordinates.
-void SparseTensorNNZ::forallIndices(uint64_t stopDim,
+void SparseTensorNNZ::forallIndices(uint64_t stopLvl,
                                     SparseTensorNNZ::NNZConsumer yield) const {
-  assert(stopDim < getRank() && "Dimension out of bounds");
-  assert(isCompressedDLT(dimTypes[stopDim]) &&
-         "Cannot look up non-compressed dimensions");
-  forallIndices(yield, stopDim, 0, 0);
+  assert(stopLvl < getLvlRank() && "Level out of bounds");
+  assert(isCompressedDLT(lvlTypes[stopLvl]) &&
+         "Cannot look up non-compressed levels");
+  forallIndices(yield, stopLvl, 0, 0);
 }
 
-/// Adds a new element (i.e., increment its statistics).  We use
-/// a method rather than inlining into the lambda in `initialize`,
-/// to avoid spurious templating over `V`.  And this method is private
-/// to avoid needing to re-assert validity of `ind` (which is guaranteed
-/// by `forallElements`).
-void SparseTensorNNZ::add(const std::vector<uint64_t> &ind) {
+void SparseTensorNNZ::add(const std::vector<uint64_t> &lvlInd) {
   uint64_t parentPos = 0;
-  for (uint64_t rank = getRank(), r = 0; r < rank; ++r) {
-    if (isCompressedDLT(dimTypes[r]))
-      nnz[r][parentPos]++;
-    parentPos = parentPos * dimSizes[r] + ind[r];
+  for (uint64_t l = 0, lvlrank = getLvlRank(); l < lvlrank; ++l) {
+    if (isCompressedDLT(lvlTypes[l]))
+      nnz[l][parentPos]++;
+    parentPos = parentPos * lvlSizes[l] + lvlInd[l];
   }
 }
 
-/// Recursive component of the public `forallIndices`.
 void SparseTensorNNZ::forallIndices(SparseTensorNNZ::NNZConsumer yield,
-                                    uint64_t stopDim, uint64_t parentPos,
-                                    uint64_t d) const {
-  assert(d <= stopDim);
-  if (d == stopDim) {
-    assert(parentPos < nnz[d].size() && "Cursor is out of range");
-    yield(nnz[d][parentPos]);
+                                    uint64_t stopLvl, uint64_t parentPos,
+                                    uint64_t l) const {
+  assert(l <= stopLvl);
+  if (l == stopLvl) {
+    assert(parentPos < nnz[l].size() && "Cursor is out of range");
+    yield(nnz[l][parentPos]);
   } else {
-    const uint64_t sz = dimSizes[d];
+    const uint64_t sz = lvlSizes[l];
     const uint64_t pstart = parentPos * sz;
-    for (uint64_t i = 0; i < sz; i++)
-      forallIndices(yield, stopDim, pstart + i, d + 1);
+    for (uint64_t i = 0; i < sz; ++i)
+      forallIndices(yield, stopLvl, pstart + i, l + 1);
   }
 }
diff --git a/mlir/lib/ExecutionEngine/SparseTensor/PermutationRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/PermutationRef.cpp
new file mode 100644
--- /dev/null
+++ b/mlir/lib/ExecutionEngine/SparseTensor/PermutationRef.cpp
@@ -0,0 +1,38 @@
+//===- PermutationRef.cpp - Permutation reference wrapper -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/ExecutionEngine/SparseTensor/PermutationRef.h"
+
+#include <cassert>
+#include <cinttypes>
+#include <vector>
+
+bool mlir::sparse_tensor::detail::isPermutation(uint64_t size,
+                                                const uint64_t *perm) {
+  assert(perm && "Got nullptr for permutation");
+  // TODO: If we ever depend on LLVMSupport, then use `llvm::BitVector` instead.
+  std::vector<bool> seen(size, false);
+  for (uint64_t i = 0; i < size; ++i) {
+    const uint64_t j = perm[i];
+    if (j >= size || seen[j])
+      return false;
+    seen[j] = true;
+  }
+  for (uint64_t i = 0; i < size; ++i)
+    if (!seen[i])
+      return false;
+  return true;
+}
+
+std::vector<uint64_t>
+mlir::sparse_tensor::detail::PermutationRef::inverse() const {
+  std::vector<uint64_t> out(permSize);
+  for (uint64_t i = 0; i < permSize; ++i)
+    out[perm[i]] = i;
+  return out;
+}
diff --git a/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp b/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp
--- a/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp
@@ -12,9 +12,6 @@
 // no benefit).  Though this also helps ensure that we avoid weak-vtables:
 // <https://llvm.org/docs/CodingStandards.html#provide-a-virtual-method-anchor-for-classes-in-headers>
 //
-// (This file also contains the definition of `assertPermutedSizesMatchShape`
-// which is used by `SparseTensorStorage` factories.)
-//
 // This file is part of the lightweight runtime support library for sparse
 // tensor manipulations.  The functionality of the support library is meant
 // to simplify benchmarking, testing, and debugging MLIR code operating on
@@ -27,25 +24,39 @@
 
 using namespace mlir::sparse_tensor;
 
-SparseTensorStorageBase::SparseTensorStorageBase(
-    const std::vector<uint64_t> &dimSizes, const uint64_t *perm,
-    const DimLevelType *sparsity)
-    : dimSizes(dimSizes), rev(getRank()),
-      dimTypes(sparsity, sparsity + getRank()) {
-  assert(perm && "Got nullptr for permutation");
-  assert(sparsity && "Got nullptr for sparsity");
-  const uint64_t rank = getRank();
-  // Validate parameters.
-  assert(rank > 0 && "Trivial shape is unsupported");
-  for (uint64_t r = 0; r < rank; ++r) {
-    assert(dimSizes[r] > 0 && "Dimension size zero has trivial storage");
-    assert((isDenseDim(r) || isCompressedDim(r) || isSingletonDim(r)) &&
-           "Unsupported DimLevelType");
+SparseTensorStorageBase::SparseTensorStorageBase( // NOLINT
+    uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
+    const uint64_t *lvlSizes, const DimLevelType *lvlTypes,
+    const uint64_t *lvl2dim)
+    : dimSizes(dimSizes, dimSizes + dimRank),
+      lvlSizes(lvlSizes, lvlSizes + lvlRank),
+      lvlTypes(lvlTypes, lvlTypes + lvlRank),
+      lvl2dim(lvl2dim, lvl2dim + lvlRank) {
+  // TODO: If we do get any nullptrs, I'm pretty sure these assertions
+  // will run too late (i.e., after copying things into vectors above).
+  // But since those fields are const I'm not sure there's any clean way
+  // to assert things before copying...
+  assert(dimSizes && "Got nullptr for dimension sizes");
+  assert(lvlSizes && "Got nullptr for level sizes");
+  assert(lvlTypes && "Got nullptr for level types");
+  assert(lvl2dim && "Got nullptr for level-to-dimension mapping");
+  // Validate dim-indexed parameters.
+  assert(dimRank > 0 && "Trivial shape is unsupported");
+  for (uint64_t d = 0; d < dimRank; ++d)
+    assert(dimSizes[d] > 0 && "Dimension size zero has trivial storage");
+  // Validate level-indexed parameters.
+  assert(lvlRank > 0 && "Trivial shape is unsupported");
+  for (uint64_t l = 0; l < lvlRank; ++l) {
+    assert(lvlSizes[l] > 0 && "Level size zero has trivial storage");
+    const auto dlt = lvlTypes[l]; // Avoid redundant bounds checking.
+    // We use `MLIR_SPARSETENSOR_FATAL` here instead of `assert` so that
+    // when this ctor is successful then all the methods can rely on the
+    // fact that each level-type satisfies one of these options (even
+    // when `NDEBUG` is true), thereby reducing the need to re-assert things.
+    if (!(isDenseDLT(dlt) || isCompressedDLT(dlt) || isSingletonDLT(dlt)))
+      MLIR_SPARSETENSOR_FATAL("unsupported level type: %d\n",
+                              static_cast<uint8_t>(dlt));
   }
-  // Construct the "reverse" (i.e., inverse) permutation.
-  // TODO: should move this computation off to the codegen
-  for (uint64_t r = 0; r < rank; ++r)
-    rev[perm[r]] = r;
 }
 
 // Helper macro for generating error messages when some
@@ -56,7 +67,8 @@
 
 #define IMPL_NEWENUMERATOR(VNAME, V)                                           \
   void SparseTensorStorageBase::newEnumerator(                                 \
-      SparseTensorEnumeratorBase<V> **, uint64_t, const uint64_t *) const {    \
+      SparseTensorEnumeratorBase<V> **, uint64_t, const uint64_t *, uint64_t,  \
+      const uint64_t *) const {                                                \
     FATAL_PIV("newEnumerator" #VNAME);                                         \
   }
 MLIR_SPARSETENSOR_FOREVERY_V(IMPL_NEWENUMERATOR)
@@ -99,19 +111,3 @@
 #undef IMPL_EXPINSERT
 
 #undef FATAL_PIV
-
-// TODO: try to unify this with `SparseTensorReader::assertMatchesShape`
-// (which is used by `openSparseTensorCOO`).  It's easy enough to resolve
-// the `std::vector` vs pointer mismatch for `dimSizes`; but it's trickier
-// to resolve the presence/absence of `perm` (without introducing extra
-// overhead), so perhaps the code duplication is unavoidable?
-void mlir::sparse_tensor::detail::assertPermutedSizesMatchShape(
-    const std::vector<uint64_t> &dimSizes, uint64_t rank, const uint64_t *perm,
-    const uint64_t *shape) {
-  assert(perm && "Got nullptr for permutation");
-  assert(shape && "Got nullptr for shape");
-  assert(rank == dimSizes.size() && "Rank mismatch");
-  for (uint64_t d = 0; d < rank; ++d)
-    assert((shape[d] == 0 || shape[d] == dimSizes[perm[d]]) &&
-           "Dimension size mismatch");
-}
diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
--- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
@@ -53,8 +53,10 @@
 #include "mlir/ExecutionEngine/SparseTensor/COO.h"
 #include "mlir/ExecutionEngine/SparseTensor/ErrorHandling.h"
 #include "mlir/ExecutionEngine/SparseTensor/File.h"
+#include "mlir/ExecutionEngine/SparseTensor/PermutationRef.h"
 #include "mlir/ExecutionEngine/SparseTensor/Storage.h"
 
+#include <cstring>
 #include <numeric>
 
 using namespace mlir::sparse_tensor;
@@ -107,51 +109,62 @@
 };
 
 /// Initializes sparse tensor from an external COO-flavored format.
+/// The `rank` argument is both dimension-rank and level-rank, and the
+/// `dim2lvl` argument must be a permutation.
 /// Used by `IMPL_CONVERTTOMLIRSPARSETENSOR`.
+//
+// TODO: This is only used by PyTACO, since MLIR itself will inline this
+// construct away.  Since this cannot be easily generalized to support
+// non-permutations, we should move it off to a new PyTACO-specific
+// runtime library.
+//
 // TODO: generalize beyond 64-bit indices.
 template <typename V>
 static SparseTensorStorage<uint64_t, uint64_t, V> *
-toMLIRSparseTensor(uint64_t rank, uint64_t nse, const uint64_t *shape,
-                   const V *values, const uint64_t *indices,
-                   const uint64_t *perm, const DimLevelType *sparsity) {
+toMLIRSparseTensor(uint64_t rank, uint64_t nse, const uint64_t *dimSizes,
+                   const V *values, const uint64_t *dimIndices,
+                   const uint64_t *dim2lvl, const DimLevelType *lvlTypes) {
 #ifndef NDEBUG
-  // Verify that perm is a permutation of 0..(rank-1).
-  std::vector<bool> seen(rank, false);
-  for (uint64_t i = 0; i < rank; ++i) {
-    const uint64_t j = perm[i];
-    if (j >= rank || seen[j])
-      MLIR_SPARSETENSOR_FATAL("Not a permutation of 0..%" PRIu64 "\n", rank);
-    seen[j] = true;
-  }
-
   // Verify that the sparsity values are supported.
   // TODO: update this check to match what we actually support.
   for (uint64_t i = 0; i < rank; ++i)
-    if (sparsity[i] != DimLevelType::Dense &&
-        sparsity[i] != DimLevelType::Compressed)
-      MLIR_SPARSETENSOR_FATAL("unsupported dimension level type: %d\n",
-                              static_cast<uint8_t>(sparsity[i]));
+    if (lvlTypes[i] != DimLevelType::Dense &&
+        lvlTypes[i] != DimLevelType::Compressed)
+      MLIR_SPARSETENSOR_FATAL("unsupported level type: %d\n",
+                              static_cast<uint8_t>(lvlTypes[i]));
 #endif
-
+  // Verify that `dim2lvl` is a permutation of `[0..(rank-1)]`.
+  // NOTE: The construction of `lvlSizes` and `lvl2dim` don't generalize
+  // to arbitrary `dim2lvl` mappings.  Whereas constructing `lvlInd` from
+  // `dimInd` does (though the details would have to be updated, just
+  // like for `IMPL_ADDELT`).
+  detail::PermutationRef d2l(rank, dim2lvl);
   // Convert external format to internal COO.
-  auto *coo = SparseTensorCOO<V>::newSparseTensorCOO(rank, shape, perm, nse);
-  std::vector<uint64_t> idx(rank);
-  for (uint64_t i = 0, base = 0; i < nse; i++) {
-    for (uint64_t r = 0; r < rank; r++)
-      idx[perm[r]] = indices[base + r];
-    coo->add(idx, values[i]);
-    base += rank;
+  auto lvlSizes = d2l.pushforward(rank, dimSizes);
+  auto *lvlCOO = new SparseTensorCOO<V>(lvlSizes, nse);
+  std::vector<uint64_t> lvlInd(rank);
+  const uint64_t *dimInd = dimIndices;
+  for (uint64_t i = 0; i < nse; ++i) {
+    d2l.pushforward(rank, dimInd, lvlInd.data());
+    lvlCOO->add(lvlInd, values[i]);
+    dimInd += rank;
   }
   // Return sparse tensor storage format as opaque pointer.
-  auto *tensor = SparseTensorStorage<uint64_t, uint64_t, V>::newSparseTensor(
-      rank, shape, perm, sparsity, coo);
-  delete coo;
+  auto lvl2dim = d2l.inverse();
+  auto *tensor = SparseTensorStorage<uint64_t, uint64_t, V>::newFromCOO(
+      rank, dimSizes, rank, lvlTypes, lvl2dim.data(), *lvlCOO);
+  delete lvlCOO;
   return tensor;
 }
 
 /// Converts a sparse tensor to an external COO-flavored format.
 /// Used by `IMPL_CONVERTFROMMLIRSPARSETENSOR`.
 //
+// TODO: This is only used by PyTACO (and one place in the python
+// integration tests), since MLIR itself will inline this construct away.
+// Since this cannot be easily generalized to support non-permutations,
+// we should move it off to a new PyTACO-specific runtime library.
+//
 // TODO: Currently, values are copied from SparseTensorStorage to
 // SparseTensorCOO, then to the output.  We may want to reduce the number
 // of copies.
@@ -164,30 +177,34 @@
                      uint64_t *pRank, uint64_t *pNse, uint64_t **pShape,
                      V **pValues, uint64_t **pIndices) {
   assert(tensor && "Received nullptr for tensor");
-  uint64_t rank = tensor->getRank();
-  std::vector<uint64_t> perm(rank);
-  std::iota(perm.begin(), perm.end(), 0);
-  SparseTensorCOO<V> *coo = tensor->toCOO(perm.data());
+  uint64_t dimRank = tensor->getDimRank();
+  const auto &dimSizes = tensor->getDimSizes();
+  std::vector<uint64_t> identityPerm(dimRank);
+  std::iota(identityPerm.begin(), identityPerm.end(), 0);
+  SparseTensorCOO<V> *coo =
+      tensor->toCOO(dimRank, dimSizes.data(), dimRank, identityPerm.data());
 
   const std::vector<Element<V>> &elements = coo->getElements();
   uint64_t nse = elements.size();
 
-  uint64_t *shape = new uint64_t[rank];
-  for (uint64_t i = 0; i < rank; i++)
-    shape[i] = coo->getDimSizes()[i];
+  const auto &cooSizes = coo->getDimSizes();
+  assert(cooSizes.size() == dimRank && "Rank mismatch");
+  uint64_t *shape = new uint64_t[dimRank];
+  std::memcpy((void *)shape, (void *)cooSizes.data(),
+              sizeof(uint64_t) * dimRank);
 
   V *values = new V[nse];
-  uint64_t *indices = new uint64_t[rank * nse];
+  uint64_t *indices = new uint64_t[dimRank * nse];
 
-  for (uint64_t i = 0, base = 0; i < nse; i++) {
+  for (uint64_t i = 0, base = 0; i < nse; ++i) {
     values[i] = elements[i].value;
-    for (uint64_t j = 0; j < rank; j++)
-      indices[base + j] = elements[i].indices[j];
-    base += rank;
+    for (uint64_t d = 0; d < dimRank; ++d)
+      indices[base + d] = elements[i].indices[d];
+    base += dimRank;
   }
 
   delete coo;
-  *pRank = rank;
+  *pRank = dimRank;
   *pNse = nse;
   *pShape = shape;
   *pValues = values;
@@ -207,36 +224,44 @@
 
 #define CASE(p, i, v, P, I, V)                                                 \
   if (ptrTp == (p) && indTp == (i) && valTp == (v)) {                          \
-    SparseTensorCOO<V> *coo = nullptr;                                         \
-    if (action <= Action::kFromCOO) {                                          \
-      if (action == Action::kFromFile) {                                       \
-        char *filename = static_cast<char *>(ptr);                             \
-        coo = openSparseTensorCOO<V>(filename, rank, shape, perm, v);          \
-      } else if (action == Action::kFromCOO) {                                 \
-        coo = static_cast<SparseTensorCOO<V> *>(ptr);                          \
-      } else {                                                                 \
-        assert(action == Action::kEmpty);                                      \
-      }                                                                        \
-      auto *tensor = SparseTensorStorage<P, I, V>::newSparseTensor(            \
-          rank, shape, perm, sparsity, coo);                                   \
-      if (action == Action::kFromFile)                                         \
-        delete coo;                                                            \
-      return tensor;                                                           \
+    switch (action) {                                                          \
+    case Action::kEmpty:                                                       \
+      return SparseTensorStorage<P, I, V>::newEmpty(                           \
+          dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, lvl2dim);            \
+    case Action::kFromFile: {                                                  \
+      char *filename = static_cast<char *>(ptr);                               \
+      return openSparseTensor<P, I, V>(dimRank, dimSizes, lvlRank, lvlTypes,   \
+                                       lvl2dim, dim2lvl, filename, v);         \
     }                                                                          \
-    if (action == Action::kSparseToSparse) {                                   \
-      auto *tensor = static_cast<SparseTensorStorageBase *>(ptr);              \
-      return SparseTensorStorage<P, I, V>::newSparseTensor(rank, shape, perm,  \
-                                                           sparsity, tensor);  \
+    case Action::kFromCOO: {                                                   \
+      assert(ptr && "Received nullptr for SparseTensorCOO object");            \
+      auto &coo = *static_cast<SparseTensorCOO<V> *>(ptr);                     \
+      return SparseTensorStorage<P, I, V>::newFromCOO(                         \
+          dimRank, dimSizes, lvlRank, lvlTypes, lvl2dim, coo);                 \
     }                                                                          \
-    if (action == Action::kEmptyCOO)                                           \
-      return SparseTensorCOO<V>::newSparseTensorCOO(rank, shape, perm);        \
-    coo = static_cast<SparseTensorStorage<P, I, V> *>(ptr)->toCOO(perm);       \
-    if (action == Action::kToIterator) {                                       \
+    case Action::kSparseToSparse: {                                            \
+      assert(ptr && "Received nullptr for SparseTensorStorage object");        \
+      auto &tensor = *static_cast<SparseTensorStorageBase *>(ptr);             \
+      return SparseTensorStorage<P, I, V>::newFromSparseTensor(                \
+          dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, lvl2dim, dimRank,    \
+          dim2lvl, tensor);                                                    \
+    }                                                                          \
+    case Action::kEmptyCOO:                                                    \
+      return new SparseTensorCOO<V>(lvlRank, lvlSizes);                        \
+    case Action::kToCOO: {                                                     \
+      assert(ptr && "Received nullptr for SparseTensorStorage object");        \
+      auto &tensor = *static_cast<SparseTensorStorage<P, I, V> *>(ptr);        \
+      return tensor.toCOO(lvlRank, lvlSizes, dimRank, dim2lvl);                \
+    }                                                                          \
+    case Action::kToIterator: {                                                \
+      assert(ptr && "Received nullptr for SparseTensorStorage object");        \
+      auto &tensor = *static_cast<SparseTensorStorage<P, I, V> *>(ptr);        \
+      auto *coo = tensor.toCOO(lvlRank, lvlSizes, dimRank, dim2lvl);           \
       return new SparseTensorIterator<V>(coo);                                 \
-    } else {                                                                   \
-      assert(action == Action::kToCOO);                                        \
     }                                                                          \
-    return coo;                                                                \
+    }                                                                          \
+    MLIR_SPARSETENSOR_FATAL("unknown action: %d\n",                            \
+                            static_cast<uint32_t>(action));                    \
   }
 
 #define CASE_SECSAME(p, v, P, V) CASE(p, p, v, P, P, V)
@@ -247,20 +272,32 @@
 static_assert(std::is_same<index_type, uint64_t>::value,
               "Expected index_type == uint64_t");
 
-void *
-_mlir_ciface_newSparseTensor(StridedMemRefType<DimLevelType, 1> *aref, // NOLINT
-                             StridedMemRefType<index_type, 1> *sref,
-                             StridedMemRefType<index_type, 1> *pref,
-                             OverheadType ptrTp, OverheadType indTp,
-                             PrimaryType valTp, Action action, void *ptr) {
-  assert(aref && sref && pref);
-  assert(aref->strides[0] == 1 && sref->strides[0] == 1 &&
-         pref->strides[0] == 1);
-  assert(aref->sizes[0] == sref->sizes[0] && sref->sizes[0] == pref->sizes[0]);
-  const DimLevelType *sparsity = aref->data + aref->offset;
-  const index_type *shape = sref->data + sref->offset;
-  const index_type *perm = pref->data + pref->offset;
-  uint64_t rank = aref->sizes[0];
+// TODO: this swiss-army-knife should be split up into separate functions
+// for each action, since the various actions don't agree on (1) whether
+// the first two arguments are "sizes" vs "shapes", (2) whether the "lvl"
+// arguments are actually storage-levels vs target tensor-dimensions,
+// (3) whether all the arguments are actually used/required.
+void *_mlir_ciface_newSparseTensor( // NOLINT
+    StridedMemRefType<index_type, 1> *dimSizesRef,
+    StridedMemRefType<index_type, 1> *lvlSizesRef,
+    StridedMemRefType<DimLevelType, 1> *lvlTypesRef,
+    StridedMemRefType<index_type, 1> *lvl2dimRef,
+    StridedMemRefType<index_type, 1> *dim2lvlRef, OverheadType ptrTp,
+    OverheadType indTp, PrimaryType valTp, Action action, void *ptr) {
+  assert(dimSizesRef && dimSizesRef->strides[0] == 1);
+  assert(lvlSizesRef && lvlSizesRef->strides[0] == 1);
+  assert(lvlTypesRef && lvlTypesRef->strides[0] == 1);
+  assert(lvl2dimRef && lvl2dimRef->strides[0] == 1);
+  assert(dim2lvlRef && dim2lvlRef->strides[0] == 1);
+  const uint64_t dimRank = dimSizesRef->sizes[0];
+  const uint64_t lvlRank = lvlSizesRef->sizes[0];
+  assert(dim2lvlRef->sizes[0] == dimRank);
+  assert(lvlTypesRef->sizes[0] == lvlRank && lvl2dimRef->sizes[0] == lvlRank);
+  const index_type *dimSizes = dimSizesRef->data + dimSizesRef->offset;
+  const index_type *lvlSizes = lvlSizesRef->data + lvlSizesRef->offset;
+  const DimLevelType *lvlTypes = lvlTypesRef->data + lvlTypesRef->offset;
+  const index_type *lvl2dim = lvl2dimRef->data + lvl2dimRef->offset;
+  const index_type *dim2lvl = dim2lvlRef->data + dim2lvlRef->offset;
 
   // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases.
   // This is safe because of the static_assert above.
@@ -415,22 +452,26 @@
 #undef IMPL_SPARSEINDICES
 #undef IMPL_GETOVERHEAD
 
+// TODO: while this API design will work for arbitrary dim2lvl mappings,
+// we should probably move the `dimInd`-to-`lvlInd` computation into codegen
+// (since that could enable optimizations to remove the intermediate memref).
 #define IMPL_ADDELT(VNAME, V)                                                  \
-  void *_mlir_ciface_addElt##VNAME(void *coo, StridedMemRefType<V, 0> *vref,   \
-                                   StridedMemRefType<index_type, 1> *iref,     \
-                                   StridedMemRefType<index_type, 1> *pref) {   \
-    assert(coo &&vref &&iref &&pref);                                          \
-    assert(iref->strides[0] == 1 && pref->strides[0] == 1);                    \
-    assert(iref->sizes[0] == pref->sizes[0]);                                  \
-    const index_type *indx = iref->data + iref->offset;                        \
-    const index_type *perm = pref->data + pref->offset;                        \
-    uint64_t isize = iref->sizes[0];                                           \
-    std::vector<index_type> indices(isize);                                    \
-    for (uint64_t r = 0; r < isize; r++)                                       \
-      indices[perm[r]] = indx[r];                                              \
+  void *_mlir_ciface_addElt##VNAME(                                            \
+      void *lvlCOO, StridedMemRefType<V, 0> *vref,                             \
+      StridedMemRefType<index_type, 1> *dimIndRef,                             \
+      StridedMemRefType<index_type, 1> *dim2lvlRef) {                          \
+    assert(lvlCOO &&vref &&dimIndRef &&dim2lvlRef);                            \
+    assert(dimIndRef->strides[0] == 1 && dim2lvlRef->strides[0] == 1);         \
+    const uint64_t rank = dimIndRef->sizes[0];                                 \
+    assert(dim2lvlRef->sizes[0] == rank);                                      \
+    const index_type *dimInd = dimIndRef->data + dimIndRef->offset;            \
+    const index_type *dim2lvl = dim2lvlRef->data + dim2lvlRef->offset;         \
+    std::vector<index_type> lvlInd(rank);                                      \
+    for (uint64_t d = 0; d < rank; ++d)                                        \
+      lvlInd[dim2lvl[d]] = dimInd[d];                                          \
     V *value = vref->data + vref->offset;                                      \
-    static_cast<SparseTensorCOO<V> *>(coo)->add(indices, *value);              \
-    return coo;                                                                \
+    static_cast<SparseTensorCOO<V> *>(lvlCOO)->add(lvlInd, *value);            \
+    return lvlCOO;                                                             \
   }
 MLIR_SPARSETENSOR_FOREVERY_V(IMPL_ADDELT)
 #undef IMPL_ADDELT
@@ -498,8 +539,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-index_type sparseDimSize(void *tensor, index_type d) {
-  return static_cast<SparseTensorStorageBase *>(tensor)->getDimSize(d);
+index_type sparseLvlSize(void *tensor, index_type x) {
+  return static_cast<SparseTensorStorageBase *>(tensor)->getLvlSize(x);
 }
 
 void endInsert(void *tensor) {
diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir
--- a/mlir/test/Dialect/SparseTensor/conversion.mlir
+++ b/mlir/test/Dialect/SparseTensor/conversion.mlir
@@ -40,7 +40,7 @@
 // CHECK-LABEL: func @sparse_dim1d(
 //  CHECK-SAME: %[[A:.*]]: !llvm.ptr<i8>)
 //       CHECK: %[[C:.*]] = arith.constant 0 : index
-//       CHECK: %[[D:.*]] = call @sparseDimSize(%[[A]], %[[C]])
+//       CHECK: %[[D:.*]] = call @sparseLvlSize(%[[A]], %[[C]])
 //       CHECK: return %[[D]] : index
 func.func @sparse_dim1d(%arg0: tensor<?xf64, #SparseVector>) -> index {
   %c = arith.constant 0 : index
@@ -51,7 +51,7 @@
 // CHECK-LABEL: func @sparse_dim3d(
 //  CHECK-SAME: %[[A:.*]]: !llvm.ptr<i8>)
 //       CHECK: %[[C:.*]] = arith.constant 2 : index
-//       CHECK: %[[D:.*]] = call @sparseDimSize(%[[A]], %[[C]])
+//       CHECK: %[[D:.*]] = call @sparseLvlSize(%[[A]], %[[C]])
 //       CHECK: return %[[D]] : index
 func.func @sparse_dim3d(%arg0: tensor<?x?x?xf64, #SparseTensor>) -> index {
   // Querying for dimension 1 in the tensor type needs to be
@@ -78,13 +78,15 @@
 // CHECK-LABEL: func @sparse_new1d(
 //  CHECK-SAME: %[[A:.*]]: !llvm.ptr<i8>) -> !llvm.ptr<i8>
 //   CHECK-DAG: %[[FromFile:.*]] = arith.constant 1 : i32
-//   CHECK-DAG: %[[P:.*]] = memref.alloca() : memref<1xi8>
-//   CHECK-DAG: %[[Q:.*]] = memref.alloca() : memref<1xindex>
-//   CHECK-DAG: %[[R:.*]] = memref.alloca() : memref<1xindex>
-//   CHECK-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<1xi8> to memref<?xi8>
-//   CHECK-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<1xindex> to memref<?xindex>
-//   CHECK-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<1xindex> to memref<?xindex>
-//       CHECK: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromFile]], %[[A]])
+//   CHECK-DAG: %[[DimSizes0:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-DAG: %[[LvlSizes0:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<1xi8>
+//   CHECK-DAG: %[[Iota0:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-DAG: %[[DimSizes:.*]] = memref.cast %[[DimSizes0]] : memref<1xindex> to memref<?xindex>
+//   CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<1xindex> to memref<?xindex>
+//   CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<1xi8> to memref<?xi8>
+//   CHECK-DAG: %[[Iota:.*]] = memref.cast %[[Iota0]] : memref<1xindex> to memref<?xindex>
+//       CHECK: %[[T:.*]] = call @newSparseTensor(%[[DimSizes]], %[[LvlSizes]], %[[LvlTypes]], %[[Iota]], %[[Iota]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromFile]], %[[A]])
 //       CHECK: return %[[T]] : !llvm.ptr<i8>
 func.func @sparse_new1d(%arg0: !llvm.ptr<i8>) -> tensor<128xf64, #SparseVector> {
   %0 = sparse_tensor.new %arg0 : !llvm.ptr<i8> to tensor<128xf64, #SparseVector>
@@ -94,13 +96,15 @@
 // CHECK-LABEL: func @sparse_new2d(
 //  CHECK-SAME: %[[A:.*]]: !llvm.ptr<i8>) -> !llvm.ptr<i8>
 //   CHECK-DAG: %[[FromFile:.*]] = arith.constant 1 : i32
-//   CHECK-DAG: %[[P:.*]] = memref.alloca() : memref<2xi8>
-//   CHECK-DAG: %[[Q:.*]] = memref.alloca() : memref<2xindex>
-//   CHECK-DAG: %[[R:.*]] = memref.alloca() : memref<2xindex>
-//   CHECK-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<2xi8> to memref<?xi8>
-//   CHECK-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<2xindex> to memref<?xindex>
-//   CHECK-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<2xindex> to memref<?xindex>
-//       CHECK: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromFile]], %[[A]])
+//   CHECK-DAG: %[[DimSizes0:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[LvlSizes0:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<2xi8>
+//   CHECK-DAG: %[[Iota0:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[DimSizes:.*]] = memref.cast %[[DimSizes0]] : memref<2xindex> to memref<?xindex>
+//   CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<2xindex> to memref<?xindex>
+//   CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<2xi8> to memref<?xi8>
+//   CHECK-DAG: %[[Iota:.*]] = memref.cast %[[Iota0]] : memref<2xindex> to memref<?xindex>
+//       CHECK: %[[T:.*]] = call @newSparseTensor(%[[DimSizes]], %[[LvlSizes]], %[[LvlTypes]], %[[Iota]], %[[Iota]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromFile]], %[[A]])
 //       CHECK: return %[[T]] : !llvm.ptr<i8>
 func.func @sparse_new2d(%arg0: !llvm.ptr<i8>) -> tensor<?x?xf32, #CSR> {
   %0 = sparse_tensor.new %arg0 : !llvm.ptr<i8> to tensor<?x?xf32, #CSR>
@@ -110,13 +114,17 @@
 // CHECK-LABEL: func @sparse_new3d(
 //  CHECK-SAME: %[[A:.*]]: !llvm.ptr<i8>) -> !llvm.ptr<i8>
 //   CHECK-DAG: %[[FromFile:.*]] = arith.constant 1 : i32
-//   CHECK-DAG: %[[P:.*]] = memref.alloca() : memref<3xi8>
-//   CHECK-DAG: %[[Q:.*]] = memref.alloca() : memref<3xindex>
-//   CHECK-DAG: %[[R:.*]] = memref.alloca() : memref<3xindex>
-//   CHECK-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<3xi8> to memref<?xi8>
-//   CHECK-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<3xindex> to memref<?xindex>
-//   CHECK-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<3xindex> to memref<?xindex>
-//       CHECK: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromFile]], %[[A]])
+//   CHECK-DAG: %[[DimSizes0:.*]] = memref.alloca() : memref<3xindex>
+//   CHECK-DAG: %[[LvlSizes0:.*]] = memref.alloca() : memref<3xindex>
+//   CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<3xi8>
+//   CHECK-DAG: %[[Lvl2Dim0:.*]] = memref.alloca() : memref<3xindex>
+//   CHECK-DAG: %[[Dim2Lvl0:.*]] = memref.alloca() : memref<3xindex>
+//   CHECK-DAG: %[[DimSizes:.*]] = memref.cast %[[DimSizes0]] : memref<3xindex> to memref<?xindex>
+//   CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref<?xindex>
+//   CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<3xi8> to memref<?xi8>
+//   CHECK-DAG: %[[Lvl2Dim:.*]] = memref.cast %[[Lvl2Dim0]] : memref<3xindex> to memref<?xindex>
+//   CHECK-DAG: %[[Dim2Lvl:.*]] = memref.cast %[[Dim2Lvl0]] : memref<3xindex> to memref<?xindex>
+//       CHECK: %[[T:.*]] = call @newSparseTensor(%[[DimSizes]], %[[LvlSizes]], %[[LvlTypes]], %[[Lvl2Dim]], %[[Dim2Lvl]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromFile]], %[[A]])
 //       CHECK: return %[[T]] : !llvm.ptr<i8>
 func.func @sparse_new3d(%arg0: !llvm.ptr<i8>) -> tensor<?x?x?xf32, #SparseTensor> {
   %0 = sparse_tensor.new %arg0 : !llvm.ptr<i8> to tensor<?x?x?xf32, #SparseTensor>
@@ -129,16 +137,18 @@
 //   CHECK-DAG: %[[Empty:.*]] = arith.constant 0 : i32
 //   CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
 //   CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
-//   CHECK-DAG: %[[P:.*]] = memref.alloca() : memref<2xi8>
-//   CHECK-DAG: %[[Q:.*]] = memref.alloca() : memref<2xindex>
-//   CHECK-DAG: %[[R:.*]] = memref.alloca() : memref<2xindex>
-//   CHECK-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<2xi8> to memref<?xi8>
-//   CHECK-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<2xindex> to memref<?xindex>
-//   CHECK-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<2xindex> to memref<?xindex>
-//   CHECK-DAG: memref.store %[[I]], %[[Q]][%[[C0]]] : memref<2xindex>
-//   CHECK-DAG: memref.store %[[J]], %[[Q]][%[[C1]]] : memref<2xindex>
+//   CHECK-DAG: %[[DimSizes0:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[LvlSizes0:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[LvlTypes0:.*]] = memref.alloca() : memref<2xi8>
+//   CHECK-DAG: %[[Iota0:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[DimSizes:.*]] = memref.cast %[[DimSizes0]] : memref<2xindex> to memref<?xindex>
+//   CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<2xindex> to memref<?xindex>
+//   CHECK-DAG: %[[LvlTypes:.*]] = memref.cast %[[LvlTypes0]] : memref<2xi8> to memref<?xi8>
+//   CHECK-DAG: %[[Iota:.*]] = memref.cast %[[Iota0]] : memref<2xindex> to memref<?xindex>
+//   CHECK-DAG: memref.store %[[I]], %[[DimSizes0]][%[[C0]]] : memref<2xindex>
+//   CHECK-DAG: memref.store %[[J]], %[[DimSizes0]][%[[C1]]] : memref<2xindex>
 //       CHECK: %[[NP:.*]] = llvm.mlir.null : !llvm.ptr<i8>
-//       CHECK: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[Empty]], %[[NP]])
+//       CHECK: %[[T:.*]] = call @newSparseTensor(%[[DimSizes]], %[[LvlSizes]], %[[LvlTypes]], %[[Iota]], %[[Iota]], %{{.*}}, %{{.*}}, %{{.*}}, %[[Empty]], %[[NP]])
 //       CHECK: return %[[T]] : !llvm.ptr<i8>
 func.func @sparse_init(%arg0: index, %arg1: index) -> tensor<?x?xf64, #CSR> {
   %0 = bufferization.alloc_tensor(%arg0, %arg1) : tensor<?x?xf64, #CSR>
@@ -350,7 +360,7 @@
 // CHECK-LABEL: func @sparse_expansion3(
 //       CHECK: %[[C1:.*]] = arith.constant 1 : index
 //       CHECK: %[[N:.*]] = call @newSparseTensor
-//       CHECK: %[[S:.*]] = call @sparseDimSize(%[[N]], %c1) : (!llvm.ptr<i8>, index) -> index
+//       CHECK: %[[S:.*]] = call @sparseLvlSize(%[[N]], %c1) : (!llvm.ptr<i8>, index) -> index
 //       CHECK: %[[A:.*]] = memref.alloc(%[[S]]) : memref<?xf64>
 //       CHECK: %[[B:.*]] = memref.alloc(%[[S]]) : memref<?xi1>
 //       CHECK: %[[C:.*]] = memref.alloc(%[[S]]) : memref<?xindex>
@@ -396,7 +406,7 @@
 //  CHECK-SAME: %[[B:.*]]: !llvm.ptr<i8>)
 //  CHECK-DAG:  %[[ToCOO:.*]] = arith.constant 5 : i32
 //  CHECK-DAG:  %[[Sort:.*]] = arith.constant false
-//       CHECK: %[[COO:.*]] = call @newSparseTensor(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[ToCOO]], %[[A]])
+//       CHECK: %[[COO:.*]] = call @newSparseTensor(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[ToCOO]], %[[A]])
 //       CHECK: call @outSparseTensorF64(%[[COO]], %[[B]], %[[Sort]]) : (!llvm.ptr<i8>, !llvm.ptr<i8>, i1) -> ()
 //       CHECK: call @delSparseTensorCOOF64(%[[COO]])
 //       CHECK: return
@@ -410,7 +420,7 @@
 //  CHECK-SAME: %[[B:.*]]: !llvm.ptr<i8>)
 //  CHECK-DAG:  %[[ToCOO:.*]] = arith.constant 5 : i32
 //  CHECK-DAG:  %[[Sort:.*]] = arith.constant true
-//       CHECK: %[[COO:.*]] = call @newSparseTensor(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[ToCOO]], %[[A]])
+//       CHECK: %[[COO:.*]] = call @newSparseTensor(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[ToCOO]], %[[A]])
 //       CHECK: call @outSparseTensorF32(%[[COO]], %[[B]], %[[Sort]]) : (!llvm.ptr<i8>, !llvm.ptr<i8>, i1) -> ()
 //       CHECK: call @delSparseTensorCOOF32(%[[COO]])
 //       CHECK: return
diff --git a/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir b/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir
--- a/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir
+++ b/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir
@@ -23,14 +23,16 @@
 //   CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
 //   CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
 //   CHECK-DAG: %[[U:.*]] = tensor.dim %[[A]], %[[C0]] : tensor<?xi32>
-//   CHECK-DAG: %[[P:.*]] = memref.alloca() : memref<1xi8>
-//   CHECK-DAG: %[[Q:.*]] = memref.alloca() : memref<1xindex>
-//   CHECK-DAG: %[[R:.*]] = memref.alloca() : memref<1xindex>
-//   CHECK-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<1xi8> to memref<?xi8>
-//   CHECK-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<1xindex> to memref<?xindex>
-//   CHECK-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<1xindex> to memref<?xindex>
+//   CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<1xi8>
+//   CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-DAG: %[[Iota:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<1xi8> to memref<?xi8>
+//   CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<1xindex> to memref<?xindex>
+//   CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<1xindex> to memref<?xindex>
+//   CHECK-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<1xindex> to memref<?xindex>
 //       CHECK: %[[NP:.*]] = llvm.mlir.null : !llvm.ptr<i8>
-//       CHECK: %[[C:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[EmptyCOO]], %[[NP]])
+//       CHECK: %[[C:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[EmptyCOO]], %[[NP]])
 //       CHECK: %[[M:.*]] = memref.alloca() : memref<1xindex>
 //       CHECK: %[[T:.*]] = memref.cast %[[M]] : memref<1xindex> to memref<?xindex>
 //       CHECK: %[[BUF:.*]] = memref.alloca() : memref<i32>
@@ -40,10 +42,10 @@
 //       CHECK:   scf.if %[[N]] {
 //       CHECK:     memref.store %[[I]], %[[M]][%[[C0]]] : memref<1xindex>
 //       CHECK:     memref.store %[[E]], %[[BUF]][] : memref<i32>
-//       CHECK:     call @addEltI32(%[[C]], %[[BUF]], %[[T]], %[[Z]])
+//       CHECK:     call @addEltI32(%[[C]], %[[BUF]], %[[T]], %[[IotaP]])
 //       CHECK:   }
 //       CHECK: }
-//       CHECK: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]])
+//       CHECK: %[[T:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]])
 //       CHECK: call @delSparseTensorCOOI32(%[[C]])
 //       CHECK: return %[[T]] : !llvm.ptr<i8>
 func.func @sparse_convert_1d(%arg0: tensor<?xi32>) -> tensor<?xi32, #SparseVector> {
@@ -79,14 +81,16 @@
 //   CHECK-DAG: %[[FromCOO:.*]] = arith.constant 2 : i32
 //   CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
 //   CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
-//   CHECK-DAG: %[[P:.*]] = memref.alloca() : memref<2xi8>
-//   CHECK-DAG: %[[Q:.*]] = memref.alloca() : memref<2xindex>
-//   CHECK-DAG: %[[R:.*]] = memref.alloca() : memref<2xindex>
-//   CHECK-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<2xi8> to memref<?xi8>
-//   CHECK-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<2xindex> to memref<?xindex>
-//   CHECK-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<2xindex> to memref<?xindex>
+//   CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<2xi8>
+//   CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[Iota:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<2xi8> to memref<?xi8>
+//   CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<2xindex> to memref<?xindex>
+//   CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<2xindex> to memref<?xindex>
+//   CHECK-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<2xindex> to memref<?xindex>
 //       CHECK: %[[NP:.*]] = llvm.mlir.null : !llvm.ptr<i8>
-//       CHECK: %[[C:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[EmptyCOO]], %[[NP]])
+//       CHECK: %[[C:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[EmptyCOO]], %[[NP]])
 //       CHECK: %[[M:.*]] = memref.alloca() : memref<2xindex>
 //       CHECK: %[[T:.*]] = memref.cast %[[M]] : memref<2xindex> to memref<?xindex>
 //       CHECK: %[[BUF:.*]] = memref.alloca() : memref<f64>
@@ -96,10 +100,10 @@
 //       CHECK:     memref.store %[[I]], %[[M]][%[[C0]]] : memref<2xindex>
 //       CHECK:     memref.store %[[J]], %[[M]][%[[C1]]] : memref<2xindex>
 //       CHECK:     memref.store %[[E]], %[[BUF]][] : memref<f64>
-//       CHECK:     call @addEltF64(%[[C]], %[[BUF]], %[[T]], %[[Z]])
+//       CHECK:     call @addEltF64(%[[C]], %[[BUF]], %[[T]], %[[IotaP]])
 //       CHECK:   }
 //       CHECK: }
-//       CHECK: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]])
+//       CHECK: %[[T:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]])
 //       CHECK: call @delSparseTensorCOOF64(%[[C]])
 //       CHECK: return %[[T]] : !llvm.ptr<i8>
 
@@ -147,14 +151,16 @@
 //   CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
 //   CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
 //   CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
-//   CHECK-DAG: %[[P:.*]] = memref.alloca() : memref<2xi8>
-//   CHECK-DAG: %[[Q:.*]] = memref.alloca() : memref<2xindex>
-//   CHECK-DAG: %[[R:.*]] = memref.alloca() : memref<2xindex>
-//   CHECK-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<2xi8> to memref<?xi8>
-//   CHECK-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<2xindex> to memref<?xindex>
-//   CHECK-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<2xindex> to memref<?xindex>
+//   CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<2xi8>
+//   CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[Iota:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<2xi8> to memref<?xi8>
+//   CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<2xindex> to memref<?xindex>
+//   CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<2xindex> to memref<?xindex>
+//   CHECK-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<2xindex> to memref<?xindex>
 //       CHECK: %[[NP:.*]] = llvm.mlir.null : !llvm.ptr<i8>
-//       CHECK: %[[C:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[EmptyCOO]], %[[NP]])
+//       CHECK: %[[C:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[EmptyCOO]], %[[NP]])
 //       CHECK: %[[M:.*]] = memref.alloca() : memref<2xindex>
 //       CHECK: %[[N:.*]] = memref.cast %[[M]] : memref<2xindex> to memref<?xindex>
 //       CHECK: %[[BUF:.*]] = memref.alloca() : memref<f32>
@@ -163,9 +169,9 @@
 //       CHECK-DAG:   memref.store %{{.*}}, %[[M]][%[[C1]]] : memref<2xindex>
 //       CHECK-DAG:   %[[V:.*]] = tensor.extract %{{.*}}[%[[I]]] : tensor<2xf32>
 //       CHECK:   memref.store %[[V]], %[[BUF]][] : memref<f32>
-//       CHECK:   call @addEltF32(%{{.*}}, %[[BUF]], %[[N]], %{{.*}})
+//       CHECK:   call @addEltF32(%{{.*}}, %[[BUF]], %[[N]], %[[IotaP]])
 //       CHECK: }
-//       CHECK: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]])
+//       CHECK: %[[T:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]])
 //       CHECK: call @delSparseTensorCOOF32(%[[C]])
 //       CHECK: return %[[T]] : !llvm.ptr<i8>
 
@@ -218,14 +224,18 @@
 //   CHECK-DAG: %[[U1:.*]] = tensor.dim %[[A]], %[[C0]] : tensor<?x?x?xf64>
 //   CHECK-DAG: %[[U2:.*]] = tensor.dim %[[A]], %[[C1]] : tensor<?x?x?xf64>
 //   CHECK-DAG: %[[U3:.*]] = tensor.dim %[[A]], %[[C2]] : tensor<?x?x?xf64>
-//   CHECK-DAG: %[[P:.*]] = memref.alloca() : memref<3xi8>
-//   CHECK-DAG: %[[Q:.*]] = memref.alloca() : memref<3xindex>
-//   CHECK-DAG: %[[R:.*]] = memref.alloca() : memref<3xindex>
-//   CHECK-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<3xi8> to memref<?xi8>
-//   CHECK-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<3xindex> to memref<?xindex>
-//   CHECK-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<3xindex> to memref<?xindex>
+//   CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<3xi8>
+//   CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<3xindex>
+//   CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<3xindex>
+//   CHECK-DAG: %[[Lvl2Dim:.*]] = memref.alloca() : memref<3xindex>
+//   CHECK-DAG: %[[Dim2Lvl:.*]] = memref.alloca() : memref<3xindex>
+//   CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<3xi8> to memref<?xi8>
+//   CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<3xindex> to memref<?xindex>
+//   CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<3xindex> to memref<?xindex>
+//   CHECK-DAG: %[[Lvl2DimP:.*]] = memref.cast %[[Lvl2Dim]] : memref<3xindex> to memref<?xindex>
+//   CHECK-DAG: %[[Dim2LvlP:.*]] = memref.cast %[[Dim2Lvl]] : memref<3xindex> to memref<?xindex>
 //       CHECK: %[[NP:.*]] = llvm.mlir.null : !llvm.ptr<i8>
-//       CHECK: %[[C:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[EmptyCOO]], %[[NP]])
+//       CHECK: %[[C:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[Lvl2DimP]], %[[Dim2LvlP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[EmptyCOO]], %[[NP]])
 //       CHECK: %[[M:.*]] = memref.alloca() : memref<3xindex>
 //       CHECK: %[[N:.*]] = memref.cast %[[M]] : memref<3xindex> to memref<?xindex>
 //       CHECK: %[[BUF:.*]] = memref.alloca() : memref<f64>
@@ -237,11 +247,11 @@
 //       CHECK:       memref.store %[[J]], %[[M]][%[[C1]]] : memref<3xindex>
 //       CHECK:       memref.store %[[K]], %[[M]][%[[C2]]] : memref<3xindex>
 //       CHECK:       memref.store %[[E]], %[[BUF]][] : memref<f64>
-//       CHECK:       call @addEltF64(%[[C]], %[[BUF]], %[[N]], %[[Z]])
+//       CHECK:       call @addEltF64(%[[C]], %[[BUF]], %[[N]], %[[Dim2LvlP]])
 //       CHECK:     }
 //       CHECK:   }
 //       CHECK: }
-//       CHECK: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]])
+//       CHECK: %[[T:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[Lvl2DimP]], %[[Dim2LvlP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]])
 //       CHECK: call @delSparseTensorCOOF64(%[[C]])
 //       CHECK: return %[[T]] : !llvm.ptr<i8>
 func.func @sparse_convert_3d(%arg0: tensor<?x?x?xf64>) -> tensor<?x?x?xf64, #SparseTensor> {
diff --git a/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir b/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir
--- a/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir
+++ b/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir
@@ -20,24 +20,28 @@
 //  CHECK-SAME: %[[Arg:.*]]: !llvm.ptr<i8>) -> tensor<13xi32>
 //   CHECK-DAG: %[[I0:.*]] = arith.constant 0 : index
 //   CHECK-DAG: %[[I13:.*]] = arith.constant 13 : index
-//   CHECK-DAG: %[[AttrsS:.*]] = memref.alloca() : memref<1xi8>
-//   CHECK-DAG: %[[AttrsD:.*]] = memref.cast %[[AttrsS]] : memref<1xi8> to memref<?xi8>
 //   CHECK-DAG: %[[DenseDLT:.*]] = arith.constant 4 : i8
-//   CHECK-DAG: memref.store %[[DenseDLT]], %[[AttrsS]][%[[I0]]] : memref<1xi8>
-//   CHECK-DAG: %[[SizesS:.*]] = memref.alloca() : memref<1xindex>
-//   CHECK-DAG: %[[SizesD:.*]] = memref.cast %[[SizesS]] : memref<1xindex> to memref<?xindex>
-//   CHECK-DAG: memref.store %[[I13]], %[[SizesS]][%[[I0]]] : memref<1xindex>
-//   CHECK-DAG: %[[PermS:.*]] = memref.alloca() : memref<1xindex>
-//   CHECK-DAG: %[[PermD:.*]] = memref.cast %[[PermS]] : memref<1xindex> to memref<?xindex>
-//   CHECK-DAG: memref.store %[[I0]], %[[PermS]][%[[I0]]] : memref<1xindex>
-//   CHECK-DAG: %[[zeroI32:.*]] = arith.constant 0 : i32
-//   CHECK-DAG: %[[ElemTpActionToIter:.*]] = arith.constant 6 : i32
-//   CHECK-DAG: %[[Iter:.*]] = call @newSparseTensor(%[[AttrsD]], %[[SizesD]], %[[PermD]], %[[zeroI32]], %[[zeroI32]], %[[ElemTpActionToIter]], %[[ElemTpActionToIter]], %[[Arg]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+//   CHECK-DAG: %[[C0:.*]] = arith.constant 0 : i32
+//   CHECK-DAG: %[[C6:.*]] = arith.constant 6 : i32
+//
+//   CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<1xi8>
+//   CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<1xi8> to memref<?xi8>
+//   CHECK-DAG: memref.store %[[DenseDLT]], %[[LvlTypes]][%[[I0]]] : memref<1xi8>
+//   CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<1xindex> to memref<?xindex>
+//   CHECK-DAG: memref.store %[[I13]], %[[DimSizes]][%[[I0]]] : memref<1xindex>
+//   CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<1xindex> to memref<?xindex>
+//   CHECK-DAG: %[[Iota:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<1xindex> to memref<?xindex>
+//   CHECK-DAG: memref.store %[[I0]], %[[Iota]][%[[I0]]] : memref<1xindex>
+//       CHECK: %[[Iter:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %[[C0]], %[[C0]], %[[C6]], %[[C6]], %[[Arg]])
+//
 //   CHECK-DAG: %[[IndS:.*]] = memref.alloca() : memref<1xindex>
 //   CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<1xindex> to memref<?xindex>
 //   CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<i32>
 //   CHECK-DAG: %[[M:.*]] = memref.alloc() : memref<13xi32>
-//   CHECK-DAG: linalg.fill ins(%[[zeroI32]] : i32) outs(%[[M]] : memref<13xi32>)
+//   CHECK-DAG: linalg.fill ins(%[[C0]] : i32) outs(%[[M]] : memref<13xi32>)
 //       CHECK: scf.while : () -> () {
 //       CHECK:   %[[Cond:.*]] = func.call @getNextI32(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<i32>) -> i1
 //       CHECK:   scf.condition(%[[Cond]])
@@ -57,25 +61,29 @@
 // CHECK-LABEL: func @sparse_convert_1d_dyn(
 //  CHECK-SAME: %[[Arg:.*]]: !llvm.ptr<i8>) -> tensor<?xi32>
 //   CHECK-DAG: %[[I0:.*]] = arith.constant 0 : index
-//   CHECK-DAG: %[[AttrsS:.*]] = memref.alloca() : memref<1xi8>
-//   CHECK-DAG: %[[AttrsD:.*]] = memref.cast %[[AttrsS]] : memref<1xi8> to memref<?xi8>
 //   CHECK-DAG: %[[DenseDLT:.*]] = arith.constant 4 : i8
-//   CHECK-DAG: memref.store %[[DenseDLT]], %[[AttrsS]][%[[I0]]] : memref<1xi8>
-//   CHECK-DAG: %[[SizesS:.*]] = memref.alloca() : memref<1xindex>
-//   CHECK-DAG: %[[SizesD:.*]] = memref.cast %[[SizesS]] : memref<1xindex> to memref<?xindex>
-//   CHECK-DAG: %[[SizeI0:.*]] = call @sparseDimSize(%[[Arg]], %[[I0]]) : (!llvm.ptr<i8>, index) -> index
-//   CHECK-DAG: memref.store %[[SizeI0]], %[[SizesS]][%[[I0]]] : memref<1xindex>
-//   CHECK-DAG: %[[PermS:.*]] = memref.alloca() : memref<1xindex>
-//   CHECK-DAG: %[[PermD:.*]] = memref.cast %[[PermS]] : memref<1xindex> to memref<?xindex>
-//   CHECK-DAG: memref.store %[[I0]], %[[PermS]][%[[I0]]] : memref<1xindex>
-//   CHECK-DAG: %[[zeroI32:.*]] = arith.constant 0 : i32
-//   CHECK-DAG: %[[ElemTpActionToIter:.*]] = arith.constant 6 : i32
-//   CHECK-DAG: %[[Iter:.*]] = call @newSparseTensor(%[[AttrsD]], %[[SizesD]], %[[PermD]], %[[zeroI32]], %[[zeroI32]], %[[ElemTpActionToIter]], %[[ElemTpActionToIter]], %[[Arg]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+//   CHECK-DAG: %[[C0:.*]] = arith.constant 0 : i32
+//   CHECK-DAG: %[[C6:.*]] = arith.constant 6 : i32
+//
+//   CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<1xi8>
+//   CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<1xi8> to memref<?xi8>
+//   CHECK-DAG: memref.store %[[DenseDLT]], %[[LvlTypes]][%[[I0]]] : memref<1xi8>
+//   CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<1xindex> to memref<?xindex>
+//   CHECK-DAG: %[[SizeI0:.*]] = call @sparseLvlSize(%[[Arg]], %[[I0]]) : (!llvm.ptr<i8>, index) -> index
+//   CHECK-DAG: memref.store %[[SizeI0]], %[[DimSizes]][%[[I0]]] : memref<1xindex>
+//   CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<1xindex> to memref<?xindex>
+//   CHECK-DAG: %[[Iota:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<1xindex> to memref<?xindex>
+//   CHECK-DAG: memref.store %[[I0]], %[[Iota]][%[[I0]]] : memref<1xindex>
+//       CHECK: %[[Iter:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %[[C0]], %[[C0]], %[[C6]], %[[C6]], %[[Arg]])
+//
 //   CHECK-DAG: %[[IndS:.*]] = memref.alloca() : memref<1xindex>
 //   CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<1xindex> to memref<?xindex>
 //   CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<i32>
 //   CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI0]]) : memref<?xi32>
-//   CHECK-DAG: linalg.fill ins(%[[zeroI32]] : i32) outs(%[[M]] : memref<?xi32>)
+//   CHECK-DAG: linalg.fill ins(%[[C0]] : i32) outs(%[[M]] : memref<?xi32>)
 //       CHECK: scf.while : () -> () {
 //       CHECK:   %[[Cond:.*]] = func.call @getNextI32(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<i32>) -> i1
 //       CHECK:   scf.condition(%[[Cond]])
@@ -98,26 +106,30 @@
 //   CHECK-DAG: %[[I1:.*]] = arith.constant 1 : index
 //   CHECK-DAG: %[[I2:.*]] = arith.constant 2 : index
 //   CHECK-DAG: %[[I4:.*]] = arith.constant 4 : index
-//   CHECK-DAG: %[[AttrsS:.*]] = memref.alloca() : memref<2xi8>
-//   CHECK-DAG: %[[AttrsD:.*]] = memref.cast %[[AttrsS]] : memref<2xi8> to memref<?xi8>
 //   CHECK-DAG: %[[DenseDLT:.*]] = arith.constant 4 : i8
-//   CHECK-DAG: memref.store %[[DenseDLT]], %[[AttrsS]][%[[I0]]] : memref<2xi8>
-//   CHECK-DAG: memref.store %[[DenseDLT]], %[[AttrsS]][%[[I1]]] : memref<2xi8>
-//   CHECK-DAG: %[[SizesS:.*]] = memref.alloca() : memref<2xindex>
-//   CHECK-DAG: %[[SizesD:.*]] = memref.cast %[[SizesS]] : memref<2xindex> to memref<?xindex>
-//   CHECK-DAG: memref.store %[[I2]], %[[SizesS]][%[[I0]]] : memref<2xindex>
-//   CHECK-DAG: memref.store %[[I4]], %[[SizesS]][%[[I1]]] : memref<2xindex>
-//   CHECK-DAG: %[[PermS:.*]] = memref.alloca() : memref<2xindex>
-//   CHECK-DAG: %[[PermD:.*]] = memref.cast %[[PermS]] : memref<2xindex> to memref<?xindex>
-//   CHECK-DAG: memref.store %[[I0]], %[[PermS]][%[[I0]]] : memref<2xindex>
-//   CHECK-DAG: memref.store %[[I1]], %[[PermS]][%[[I1]]] : memref<2xindex>
 //   CHECK-DAG: %[[ActionToIter:.*]] = arith.constant 6 : i32
-//   CHECK-DAG: %[[Iter:.*]] = call @newSparseTensor(%[[AttrsD]], %[[SizesD]], %[[PermD]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ActionToIter]], %[[Arg]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+//   CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64
+//
+//   CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<2xi8>
+//   CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<2xi8> to memref<?xi8>
+//   CHECK-DAG: memref.store %[[DenseDLT]], %[[LvlTypes]][%[[I0]]] : memref<2xi8>
+//   CHECK-DAG: memref.store %[[DenseDLT]], %[[LvlTypes]][%[[I1]]] : memref<2xi8>
+//   CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<2xindex> to memref<?xindex>
+//   CHECK-DAG: memref.store %[[I2]], %[[DimSizes]][%[[I0]]] : memref<2xindex>
+//   CHECK-DAG: memref.store %[[I4]], %[[DimSizes]][%[[I1]]] : memref<2xindex>
+//   CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<2xindex> to memref<?xindex>
+//   CHECK-DAG: %[[Iota:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<2xindex> to memref<?xindex>
+//   CHECK-DAG: memref.store %[[I0]], %[[Iota]][%[[I0]]] : memref<2xindex>
+//   CHECK-DAG: memref.store %[[I1]], %[[Iota]][%[[I1]]] : memref<2xindex>
+//       CHECK: %[[Iter:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ActionToIter]], %[[Arg]])
+//
 //   CHECK-DAG: %[[IndS:.*]] = memref.alloca() : memref<2xindex>
 //   CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<2xindex> to memref<?xindex>
 //   CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<f64>
 //   CHECK-DAG: %[[M:.*]] = memref.alloc() : memref<2x4xf64>
-//   CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64
 //   CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<2x4xf64>)
 //       CHECK: scf.while : () -> () {
 //       CHECK:   %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
@@ -153,27 +165,31 @@
 //   CHECK-DAG: %[[I0:.*]] = arith.constant 0 : index
 //   CHECK-DAG: %[[I1:.*]] = arith.constant 1 : index
 //   CHECK-DAG: %[[I4:.*]] = arith.constant 4 : index
-//   CHECK-DAG: %[[AttrsS:.*]] = memref.alloca() : memref<2xi8>
-//   CHECK-DAG: %[[AttrsD:.*]] = memref.cast %[[AttrsS]] : memref<2xi8> to memref<?xi8>
 //   CHECK-DAG: %[[DenseDLT:.*]] = arith.constant 4 : i8
-//   CHECK-DAG: memref.store %[[DenseDLT]], %[[AttrsS]][%[[I0]]] : memref<2xi8>
-//   CHECK-DAG: memref.store %[[DenseDLT]], %[[AttrsS]][%[[I1]]] : memref<2xi8>
-//   CHECK-DAG: %[[SizesS:.*]] = memref.alloca() : memref<2xindex>
-//   CHECK-DAG: %[[SizesD:.*]] = memref.cast %[[SizesS]] : memref<2xindex> to memref<?xindex>
-//   CHECK-DAG: %[[SizeI0:.*]] = call @sparseDimSize(%[[Arg]], %[[I0]]) : (!llvm.ptr<i8>, index) -> index
-//   CHECK-DAG: memref.store %[[SizeI0]], %[[SizesS]][%[[I0]]] : memref<2xindex>
-//   CHECK-DAG: memref.store %[[I4]], %[[SizesS]][%[[I1]]] : memref<2xindex>
-//   CHECK-DAG: %[[PermS:.*]] = memref.alloca() : memref<2xindex>
-//   CHECK-DAG: %[[PermD:.*]] = memref.cast %[[PermS]] : memref<2xindex> to memref<?xindex>
-//   CHECK-DAG: memref.store %[[I0]], %[[PermS]][%[[I0]]] : memref<2xindex>
-//   CHECK-DAG: memref.store %[[I1]], %[[PermS]][%[[I1]]] : memref<2xindex>
 //   CHECK-DAG: %[[ActionToIter:.*]] = arith.constant 6 : i32
-//   CHECK-DAG: %[[Iter:.*]] = call @newSparseTensor(%[[AttrsD]], %[[SizesD]], %[[PermD]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ActionToIter]], %[[Arg]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+//   CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64
+//
+//   CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<2xi8>
+//   CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<2xi8> to memref<?xi8>
+//   CHECK-DAG: memref.store %[[DenseDLT]], %[[LvlTypes]][%[[I0]]] : memref<2xi8>
+//   CHECK-DAG: memref.store %[[DenseDLT]], %[[LvlTypes]][%[[I1]]] : memref<2xi8>
+//   CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<2xindex> to memref<?xindex>
+//   CHECK-DAG: %[[SizeI0:.*]] = call @sparseLvlSize(%[[Arg]], %[[I0]]) : (!llvm.ptr<i8>, index) -> index
+//   CHECK-DAG: memref.store %[[SizeI0]], %[[DimSizes]][%[[I0]]] : memref<2xindex>
+//   CHECK-DAG: memref.store %[[I4]], %[[DimSizes]][%[[I1]]] : memref<2xindex>
+//   CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<2xindex> to memref<?xindex>
+//   CHECK-DAG: %[[Iota:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<2xindex> to memref<?xindex>
+//   CHECK-DAG: memref.store %[[I0]], %[[Iota]][%[[I0]]] : memref<2xindex>
+//   CHECK-DAG: memref.store %[[I1]], %[[Iota]][%[[I1]]] : memref<2xindex>
+//       CHECK: %[[Iter:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ActionToIter]], %[[Arg]])
+//
 //   CHECK-DAG: %[[IndS:.*]] = memref.alloca() : memref<2xindex>
 //   CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<2xindex> to memref<?xindex>
 //   CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<f64>
 //   CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI0]]) : memref<?x4xf64>
-//   CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64
 //   CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<?x4xf64>)
 //       CHECK: scf.while : () -> () {
 //       CHECK:   %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
@@ -197,27 +213,31 @@
 //   CHECK-DAG: %[[I0:.*]] = arith.constant 0 : index
 //   CHECK-DAG: %[[I1:.*]] = arith.constant 1 : index
 //   CHECK-DAG: %[[I2:.*]] = arith.constant 2 : index
-//   CHECK-DAG: %[[AttrsS:.*]] = memref.alloca() : memref<2xi8>
-//   CHECK-DAG: %[[AttrsD:.*]] = memref.cast %[[AttrsS]] : memref<2xi8> to memref<?xi8>
 //   CHECK-DAG: %[[DenseDLT:.*]] = arith.constant 4 : i8
-//   CHECK-DAG: memref.store %[[DenseDLT]], %[[AttrsS]][%[[I0]]] : memref<2xi8>
-//   CHECK-DAG: memref.store %[[DenseDLT]], %[[AttrsS]][%[[I1]]] : memref<2xi8>
-//   CHECK-DAG: %[[SizesS:.*]] = memref.alloca() : memref<2xindex>
-//   CHECK-DAG: %[[SizesD:.*]] = memref.cast %[[SizesS]] : memref<2xindex> to memref<?xindex>
-//   CHECK-DAG: %[[SizeI1:.*]] = call @sparseDimSize(%[[Arg]], %[[I1]]) : (!llvm.ptr<i8>, index) -> index
-//   CHECK-DAG: memref.store %[[I2]], %[[SizesS]][%[[I0]]] : memref<2xindex>
-//   CHECK-DAG: memref.store %[[SizeI1]], %[[SizesS]][%[[I1]]] : memref<2xindex>
-//   CHECK-DAG: %[[PermS:.*]] = memref.alloca() : memref<2xindex>
-//   CHECK-DAG: %[[PermD:.*]] = memref.cast %[[PermS]] : memref<2xindex> to memref<?xindex>
-//   CHECK-DAG: memref.store %[[I0]], %[[PermS]][%[[I0]]] : memref<2xindex>
-//   CHECK-DAG: memref.store %[[I1]], %[[PermS]][%[[I1]]] : memref<2xindex>
 //   CHECK-DAG: %[[ActionToIter:.*]] = arith.constant 6 : i32
-//   CHECK-DAG: %[[Iter:.*]] = call @newSparseTensor(%[[AttrsD]], %[[SizesD]], %[[PermD]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ActionToIter]], %[[Arg]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+//   CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64
+//
+//   CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<2xi8>
+//   CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<2xi8> to memref<?xi8>
+//   CHECK-DAG: memref.store %[[DenseDLT]], %[[LvlTypes]][%[[I0]]] : memref<2xi8>
+//   CHECK-DAG: memref.store %[[DenseDLT]], %[[LvlTypes]][%[[I1]]] : memref<2xi8>
+//   CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<2xindex> to memref<?xindex>
+//   CHECK-DAG: %[[SizeI1:.*]] = call @sparseLvlSize(%[[Arg]], %[[I1]]) : (!llvm.ptr<i8>, index) -> index
+//   CHECK-DAG: memref.store %[[I2]], %[[DimSizes]][%[[I0]]] : memref<2xindex>
+//   CHECK-DAG: memref.store %[[SizeI1]], %[[DimSizes]][%[[I1]]] : memref<2xindex>
+//   CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<2xindex> to memref<?xindex>
+//   CHECK-DAG: %[[Iota:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<2xindex> to memref<?xindex>
+//   CHECK-DAG: memref.store %[[I0]], %[[Iota]][%[[I0]]] : memref<2xindex>
+//   CHECK-DAG: memref.store %[[I1]], %[[Iota]][%[[I1]]] : memref<2xindex>
+//       CHECK: %[[Iter:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ActionToIter]], %[[Arg]])
+//
 //   CHECK-DAG: %[[IndS:.*]] = memref.alloca() : memref<2xindex>
 //   CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<2xindex> to memref<?xindex>
 //   CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<f64>
 //   CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI1]]) : memref<2x?xf64>
-//   CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64
 //   CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<2x?xf64>)
 //       CHECK: scf.while : () -> () {
 //       CHECK:   %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
@@ -240,28 +260,32 @@
 //  CHECK-SAME: %[[Arg:.*]]: !llvm.ptr<i8>) -> tensor<?x?xf64>
 //   CHECK-DAG: %[[I0:.*]] = arith.constant 0 : index
 //   CHECK-DAG: %[[I1:.*]] = arith.constant 1 : index
-//   CHECK-DAG: %[[AttrsS:.*]] = memref.alloca() : memref<2xi8>
-//   CHECK-DAG: %[[AttrsD:.*]] = memref.cast %[[AttrsS]] : memref<2xi8> to memref<?xi8>
 //   CHECK-DAG: %[[DenseDLT:.*]] = arith.constant 4 : i8
-//   CHECK-DAG: memref.store %[[DenseDLT]], %[[AttrsS]][%[[I0]]] : memref<2xi8>
-//   CHECK-DAG: memref.store %[[DenseDLT]], %[[AttrsS]][%[[I1]]] : memref<2xi8>
-//   CHECK-DAG: %[[SizesS:.*]] = memref.alloca() : memref<2xindex>
-//   CHECK-DAG: %[[SizesD:.*]] = memref.cast %[[SizesS]] : memref<2xindex> to memref<?xindex>
-//   CHECK-DAG: %[[SizeI0:.*]] = call @sparseDimSize(%[[Arg]], %[[I0]]) : (!llvm.ptr<i8>, index) -> index
-//   CHECK-DAG: %[[SizeI1:.*]] = call @sparseDimSize(%[[Arg]], %[[I1]]) : (!llvm.ptr<i8>, index) -> index
-//   CHECK-DAG: memref.store %[[SizeI0]], %[[SizesS]][%[[I0]]] : memref<2xindex>
-//   CHECK-DAG: memref.store %[[SizeI1]], %[[SizesS]][%[[I1]]] : memref<2xindex>
-//   CHECK-DAG: %[[PermS:.*]] = memref.alloca() : memref<2xindex>
-//   CHECK-DAG: %[[PermD:.*]] = memref.cast %[[PermS]] : memref<2xindex> to memref<?xindex>
-//   CHECK-DAG: memref.store %[[I0]], %[[PermS]][%[[I0]]] : memref<2xindex>
-//   CHECK-DAG: memref.store %[[I1]], %[[PermS]][%[[I1]]] : memref<2xindex>
 //   CHECK-DAG: %[[ActionToIter:.*]] = arith.constant 6 : i32
-//   CHECK-DAG: %[[Iter:.*]] = call @newSparseTensor(%[[AttrsD]], %[[SizesD]], %[[PermD]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ActionToIter]], %[[Arg]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+//   CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64
+//
+//   CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<2xi8>
+//   CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<2xi8> to memref<?xi8>
+//   CHECK-DAG: memref.store %[[DenseDLT]], %[[LvlTypes]][%[[I0]]] : memref<2xi8>
+//   CHECK-DAG: memref.store %[[DenseDLT]], %[[LvlTypes]][%[[I1]]] : memref<2xi8>
+//   CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<2xindex> to memref<?xindex>
+//   CHECK-DAG: %[[SizeI0:.*]] = call @sparseLvlSize(%[[Arg]], %[[I0]]) : (!llvm.ptr<i8>, index) -> index
+//   CHECK-DAG: %[[SizeI1:.*]] = call @sparseLvlSize(%[[Arg]], %[[I1]]) : (!llvm.ptr<i8>, index) -> index
+//   CHECK-DAG: memref.store %[[SizeI0]], %[[DimSizes]][%[[I0]]] : memref<2xindex>
+//   CHECK-DAG: memref.store %[[SizeI1]], %[[DimSizes]][%[[I1]]] : memref<2xindex>
+//   CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<2xindex> to memref<?xindex>
+//   CHECK-DAG: %[[Iota:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<2xindex> to memref<?xindex>
+//   CHECK-DAG: memref.store %[[I0]], %[[Iota]][%[[I0]]] : memref<2xindex>
+//   CHECK-DAG: memref.store %[[I1]], %[[Iota]][%[[I1]]] : memref<2xindex>
+//       CHECK: %[[Iter:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ActionToIter]], %[[Arg]])
+//
 //   CHECK-DAG: %[[IndS:.*]] = memref.alloca() : memref<2xindex>
 //   CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<2xindex> to memref<?xindex>
 //   CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<f64>
 //   CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI0]], %[[SizeI1]]) : memref<?x?xf64>
-//   CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64
 //   CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<?x?xf64>)
 //       CHECK: scf.while : () -> () {
 //       CHECK:   %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
@@ -303,29 +327,33 @@
 //   CHECK-DAG: %[[I2:.*]] = arith.constant 2 : index
 //   CHECK-DAG: %[[I3:.*]] = arith.constant 3 : index
 //   CHECK-DAG: %[[I4:.*]] = arith.constant 4 : index
-//   CHECK-DAG: %[[AttrsS:.*]] = memref.alloca() : memref<3xi8>
-//   CHECK-DAG: %[[AttrsD:.*]] = memref.cast %[[AttrsS]] : memref<3xi8> to memref<?xi8>
 //   CHECK-DAG: %[[DenseDLT:.*]] = arith.constant 4 : i8
-//   CHECK-DAG: memref.store %[[DenseDLT]], %[[AttrsS]][%[[I0]]] : memref<3xi8>
-//   CHECK-DAG: memref.store %[[DenseDLT]], %[[AttrsS]][%[[I1]]] : memref<3xi8>
-//   CHECK-DAG: memref.store %[[DenseDLT]], %[[AttrsS]][%[[I2]]] : memref<3xi8>
-//   CHECK-DAG: %[[SizesS:.*]] = memref.alloca() : memref<3xindex>
-//   CHECK-DAG: %[[SizesD:.*]] = memref.cast %[[SizesS]] : memref<3xindex> to memref<?xindex>
-//   CHECK-DAG: memref.store %[[I2]], %[[SizesS]][%[[I0]]] : memref<3xindex>
-//   CHECK-DAG: memref.store %[[I3]], %[[SizesS]][%[[I1]]] : memref<3xindex>
-//   CHECK-DAG: memref.store %[[I4]], %[[SizesS]][%[[I2]]] : memref<3xindex>
-//   CHECK-DAG: %[[PermS:.*]] = memref.alloca() : memref<3xindex>
-//   CHECK-DAG: %[[PermD:.*]] = memref.cast %[[PermS]] : memref<3xindex> to memref<?xindex>
-//   CHECK-DAG: memref.store %[[I0]], %[[PermS]][%[[I0]]] : memref<3xindex>
-//   CHECK-DAG: memref.store %[[I1]], %[[PermS]][%[[I1]]] : memref<3xindex>
-//   CHECK-DAG: memref.store %[[I2]], %[[PermS]][%[[I2]]] : memref<3xindex>
 //   CHECK-DAG: %[[ActionToIter:.*]] = arith.constant 6 : i32
-//   CHECK-DAG: %[[Iter:.*]] = call @newSparseTensor(%[[AttrsD]], %[[SizesD]], %[[PermD]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ActionToIter]], %[[Arg]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+//   CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64
+//
+//   CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<3xi8>
+//   CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<3xi8> to memref<?xi8>
+//   CHECK-DAG: memref.store %[[DenseDLT]], %[[LvlTypes]][%[[I0]]] : memref<3xi8>
+//   CHECK-DAG: memref.store %[[DenseDLT]], %[[LvlTypes]][%[[I1]]] : memref<3xi8>
+//   CHECK-DAG: memref.store %[[DenseDLT]], %[[LvlTypes]][%[[I2]]] : memref<3xi8>
+//   CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<3xindex>
+//   CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<3xindex> to memref<?xindex>
+//   CHECK-DAG: memref.store %[[I2]], %[[DimSizes]][%[[I0]]] : memref<3xindex>
+//   CHECK-DAG: memref.store %[[I3]], %[[DimSizes]][%[[I1]]] : memref<3xindex>
+//   CHECK-DAG: memref.store %[[I4]], %[[DimSizes]][%[[I2]]] : memref<3xindex>
+//   CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<3xindex>
+//   CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<3xindex> to memref<?xindex>
+//   CHECK-DAG: %[[Iota:.*]] = memref.alloca() : memref<3xindex>
+//   CHECK-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<3xindex> to memref<?xindex>
+//   CHECK-DAG: memref.store %[[I0]], %[[Iota]][%[[I0]]] : memref<3xindex>
+//   CHECK-DAG: memref.store %[[I1]], %[[Iota]][%[[I1]]] : memref<3xindex>
+//   CHECK-DAG: memref.store %[[I2]], %[[Iota]][%[[I2]]] : memref<3xindex>
+//       CHECK: %[[Iter:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ActionToIter]], %[[Arg]])
+//
 //   CHECK-DAG: %[[IndS:.*]] = memref.alloca() : memref<3xindex>
 //   CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<3xindex> to memref<?xindex>
 //   CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<f64>
 //   CHECK-DAG: %[[M:.*]] = memref.alloc() : memref<2x3x4xf64>
-//   CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64
 //   CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<2x3x4xf64>)
 //       CHECK: scf.while : () -> () {
 //       CHECK:   %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
diff --git a/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir b/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir
--- a/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir
+++ b/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir
@@ -44,13 +44,15 @@
 // CHECK-LABEL: func @sparse_convert_1d_ss(
 //  CHECK-SAME: %[[A:.*]]: !llvm.ptr<i8>)
 //   CHECK-DAG: %[[SparseToSparse:.*]] = arith.constant 3 : i32
-//   CHECK-DAG: %[[P:.*]] = memref.alloca() : memref<1xi8>
-//   CHECK-DAG: %[[Q:.*]] = memref.alloca() : memref<1xindex>
-//   CHECK-DAG: %[[R:.*]] = memref.alloca() : memref<1xindex>
-//   CHECK-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<1xi8> to memref<?xi8>
-//   CHECK-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<1xindex> to memref<?xindex>
-//   CHECK-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<1xindex> to memref<?xindex>
-//       CHECK: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[SparseToSparse]], %[[A]])
+//   CHECK-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<1xi8>
+//   CHECK-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-DAG: %[[Iota:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<1xi8> to memref<?xi8>
+//   CHECK-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<1xindex> to memref<?xindex>
+//   CHECK-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<1xindex> to memref<?xindex>
+//   CHECK-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<1xindex> to memref<?xindex>
+//       CHECK: %[[T:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[SparseToSparse]], %[[A]])
 //       CHECK: return %[[T]] : !llvm.ptr<i8>
 func.func @sparse_convert_1d_ss(%arg0: tensor<?xf32, #SparseVector64>) -> tensor<?xf32, #SparseVector32> {
   %0 = sparse_tensor.convert %arg0 : tensor<?xf32, #SparseVector64> to tensor<?xf32, #SparseVector32>
@@ -59,28 +61,33 @@
 
 // CHECK-COO-LABEL: func @sparse_convert(
 //  CHECK-COO-SAME: %[[A:.*]]: !llvm.ptr<i8>)
-//  CHECK-COO-DAG:  %[[ToCOO:.*]] = arith.constant 5 : i32
-//  CHECK-COO-DAG:  %[[FromCOO:.*]] = arith.constant 2 : i32
-//   CHECK-COO-DAG: %[[P:.*]] = memref.alloca() : memref<1xi8>
-//   CHECK-COO-DAG: %[[Q:.*]] = memref.alloca() : memref<1xindex>
-//   CHECK-COO-DAG: %[[R:.*]] = memref.alloca() : memref<1xindex>
-//   CHECK-COO-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<1xi8> to memref<?xi8>
-//   CHECK-COO-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<1xindex> to memref<?xindex>
-//   CHECK-COO-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<1xindex> to memref<?xindex>
-//       CHECK-COO: %[[C:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ToCOO]], %[[A]])
-//       CHECK-COO: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]])
+//   CHECK-COO-DAG: %[[ToCOO:.*]] = arith.constant 5 : i32
+//   CHECK-COO-DAG: %[[FromCOO:.*]] = arith.constant 2 : i32
+//   CHECK-COO-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<1xi8>
+//   CHECK-COO-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-COO-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-COO-DAG: %[[Iota:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-COO-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<1xi8> to memref<?xi8>
+//   CHECK-COO-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<1xindex> to memref<?xindex>
+//   CHECK-COO-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<1xindex> to memref<?xindex>
+//   CHECK-COO-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<1xindex> to memref<?xindex>
+//       CHECK-COO: %[[C:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ToCOO]], %[[A]])
+//       CHECK-COO: %[[T:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]])
 //       CHECK-COO: call @delSparseTensorCOOF32(%[[C]])
 //       CHECK-COO: return %[[T]] : !llvm.ptr<i8>
+//
 // CHECK-AUTO-LABEL: func @sparse_convert(
 //  CHECK-AUTO-SAME: %[[A:.*]]: !llvm.ptr<i8>)
 //   CHECK-AUTO-DAG: %[[SparseToSparse:.*]] = arith.constant 3 : i32
-//   CHECK-AUTO-DAG: %[[P:.*]] = memref.alloca() : memref<1xi8>
-//   CHECK-AUTO-DAG: %[[Q:.*]] = memref.alloca() : memref<1xindex>
-//   CHECK-AUTO-DAG: %[[R:.*]] = memref.alloca() : memref<1xindex>
-//   CHECK-AUTO-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<1xi8> to memref<?xi8>
-//   CHECK-AUTO-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<1xindex> to memref<?xindex>
-//   CHECK-AUTO-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<1xindex> to memref<?xindex>
-//       CHECK-AUTO: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[SparseToSparse]], %[[A]])
+//   CHECK-AUTO-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<1xi8>
+//   CHECK-AUTO-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-AUTO-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-AUTO-DAG: %[[Iota:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-AUTO-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<1xi8> to memref<?xi8>
+//   CHECK-AUTO-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<1xindex> to memref<?xindex>
+//   CHECK-AUTO-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<1xindex> to memref<?xindex>
+//   CHECK-AUTO-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<1xindex> to memref<?xindex>
+//       CHECK-AUTO: %[[T:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[SparseToSparse]], %[[A]])
 //       CHECK-AUTO: return %[[T]] : !llvm.ptr<i8>
 
 // CHECK-RWT-LABEL: func.func @sparse_convert(
@@ -121,28 +128,33 @@
 
 // CHECK-COO-LABEL: func @sparse_convert_singleton(
 //  CHECK-COO-SAME: %[[A:.*]]: !llvm.ptr<i8>)
-//  CHECK-COO-DAG:  %[[ToCOO:.*]] = arith.constant 5 : i32
-//  CHECK-COO-DAG:  %[[FromCOO:.*]] = arith.constant 2 : i32
-//   CHECK-COO-DAG: %[[P:.*]] = memref.alloca() : memref<1xi8>
-//   CHECK-COO-DAG: %[[Q:.*]] = memref.alloca() : memref<1xindex>
-//   CHECK-COO-DAG: %[[R:.*]] = memref.alloca() : memref<1xindex>
-//   CHECK-COO-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<1xi8> to memref<?xi8>
-//   CHECK-COO-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<1xindex> to memref<?xindex>
-//   CHECK-COO-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<1xindex> to memref<?xindex>
-//       CHECK-COO: %[[C:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ToCOO]], %[[A]])
-//       CHECK-COO: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]])
+//   CHECK-COO-DAG: %[[ToCOO:.*]] = arith.constant 5 : i32
+//   CHECK-COO-DAG: %[[FromCOO:.*]] = arith.constant 2 : i32
+//   CHECK-COO-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<1xi8>
+//   CHECK-COO-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-COO-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-COO-DAG: %[[Iota:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-COO-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<1xi8> to memref<?xi8>
+//   CHECK-COO-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<1xindex> to memref<?xindex>
+//   CHECK-COO-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<1xindex> to memref<?xindex>
+//   CHECK-COO-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<1xindex> to memref<?xindex>
+//       CHECK-COO: %[[C:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ToCOO]], %[[A]])
+//       CHECK-COO: %[[T:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]])
 //       CHECK-COO: call @delSparseTensorCOOF32(%[[C]])
 //       CHECK-COO: return %[[T]] : !llvm.ptr<i8>
+//
 // CHECK-AUTO-LABEL: func @sparse_convert_singleton(
 //  CHECK-AUTO-SAME: %[[A:.*]]: !llvm.ptr<i8>)
 //   CHECK-AUTO-DAG: %[[SparseToSparse:.*]] = arith.constant 3 : i32
-//   CHECK-AUTO-DAG: %[[P:.*]] = memref.alloca() : memref<1xi8>
-//   CHECK-AUTO-DAG: %[[Q:.*]] = memref.alloca() : memref<1xindex>
-//   CHECK-AUTO-DAG: %[[R:.*]] = memref.alloca() : memref<1xindex>
-//   CHECK-AUTO-DAG: %[[X:.*]] = memref.cast %[[P]] : memref<1xi8> to memref<?xi8>
-//   CHECK-AUTO-DAG: %[[Y:.*]] = memref.cast %[[Q]] : memref<1xindex> to memref<?xindex>
-//   CHECK-AUTO-DAG: %[[Z:.*]] = memref.cast %[[R]] : memref<1xindex> to memref<?xindex>
-//       CHECK-AUTO: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[SparseToSparse]], %[[A]])
+//   CHECK-AUTO-DAG: %[[LvlTypes:.*]] = memref.alloca() : memref<1xi8>
+//   CHECK-AUTO-DAG: %[[DimSizes:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-AUTO-DAG: %[[LvlSizes:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-AUTO-DAG: %[[Iota:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-AUTO-DAG: %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<1xi8> to memref<?xi8>
+//   CHECK-AUTO-DAG: %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<1xindex> to memref<?xindex>
+//   CHECK-AUTO-DAG: %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<1xindex> to memref<?xindex>
+//   CHECK-AUTO-DAG: %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<1xindex> to memref<?xindex>
+//       CHECK-AUTO: %[[T:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %{{.*}}, %{{.*}}, %{{.*}}, %[[SparseToSparse]], %[[A]])
 //       CHECK-AUTO: return %[[T]] : !llvm.ptr<i8>
 func.func @sparse_convert_singleton(%arg0: tensor<?xf32, #SparseSingleton64>) -> tensor<?xf32, #SparseSingleton32> {
   %0 = sparse_tensor.convert %arg0 : tensor<?xf32, #SparseSingleton64> to tensor<?xf32, #SparseSingleton32>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir
@@ -31,19 +31,21 @@
 // CHECK:             }
 // CHECK:           }
 // CHECK:         }
-// CHECK:         %[[TMP_1:.*]] = memref.alloca() : memref<2xi8>
-// CHECK:         %[[TMP_2:.*]] = memref.cast %[[TMP_1]] : memref<2xi8> to memref<?xi8>
-// CHECK:         memref.store %[[TMP_c8_i8]], %[[TMP_1]][%[[TMP_c0]]] : memref<2xi8>
-// CHECK:         memref.store %[[TMP_c8_i8]], %[[TMP_1]][%[[TMP_c1]]] : memref<2xi8>
-// CHECK:         %[[TMP_3:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:         %[[TMP_4:.*]] = memref.cast %[[TMP_3]] : memref<2xindex> to memref<?xindex>
-// CHECK:         memref.store %[[TMP_c3]], %[[TMP_3]][%[[TMP_c0]]] : memref<2xindex>
-// CHECK:         memref.store %[[TMP_c4]], %[[TMP_3]][%[[TMP_c1]]] : memref<2xindex>
-// CHECK:         %[[TMP_5:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:         %[[TMP_6:.*]] = memref.cast %[[TMP_5]] : memref<2xindex> to memref<?xindex>
-// CHECK:         memref.store %[[TMP_c0]], %[[TMP_5]][%[[TMP_c0]]] : memref<2xindex>
-// CHECK:         memref.store %[[TMP_c1]], %[[TMP_5]][%[[TMP_c1]]] : memref<2xindex>
-// CHECK:         %[[TMP_7:.*]] = call @newSparseTensor(%[[TMP_2]], %[[TMP_4]], %[[TMP_6]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+// CHECK-DAG:     %[[LvlTypes:.*]] = memref.alloca() : memref<2xi8>
+// CHECK-DAG:     %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<2xi8> to memref<?xi8>
+// CHECK-DAG:     memref.store %[[TMP_c8_i8]], %[[LvlTypes]][%[[TMP_c0]]] : memref<2xi8>
+// CHECK-DAG:     memref.store %[[TMP_c8_i8]], %[[LvlTypes]][%[[TMP_c1]]] : memref<2xi8>
+// CHECK-DAG:     %[[DimSizes:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:     %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:     memref.store %[[TMP_c3]], %[[DimSizes]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK-DAG:     memref.store %[[TMP_c4]], %[[DimSizes]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK-DAG:     %[[LvlSizes:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:     %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:     %[[Iota:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:     %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:     memref.store %[[TMP_c0]], %[[Iota]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK-DAG:     memref.store %[[TMP_c1]], %[[Iota]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK:         %[[TMP_7:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]])
 // CHECK:         %[[TMP_8:.*]] = memref.alloca() : memref<2xindex>
 // CHECK:         %[[TMP_9:.*]] = memref.cast %[[TMP_8]] : memref<2xindex> to memref<?xindex>
 // CHECK:         %[[TMP_10:.*]] = memref.alloca() : memref<f64>
@@ -84,20 +86,22 @@
 // CHECK-DAG:     %[[TMP_c5:.*]] = arith.constant 5 : index
 // CHECK-DAG:     %[[TMP_c4:.*]] = arith.constant 4 : index
 // CHECK-DAG:     %[[TMP_c8_i8:.*]] = arith.constant 8 : i8
-// CHECK:         %[[TMP_0:.*]] = memref.alloca() : memref<2xi8>
-// CHECK:         %[[TMP_1:.*]] = memref.cast %[[TMP_0]] : memref<2xi8> to memref<?xi8>
-// CHECK:         memref.store %[[TMP_c8_i8]], %[[TMP_0]][%[[TMP_c0]]] : memref<2xi8>
-// CHECK:         memref.store %[[TMP_c8_i8]], %[[TMP_0]][%[[TMP_c1]]] : memref<2xi8>
-// CHECK:         %[[TMP_2:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:         %[[TMP_3:.*]] = memref.cast %[[TMP_2]] : memref<2xindex> to memref<?xindex>
-// CHECK:         memref.store %[[TMP_c5]], %[[TMP_2]][%[[TMP_c0]]] : memref<2xindex>
-// CHECK:         memref.store %[[TMP_c4]], %[[TMP_2]][%[[TMP_c1]]] : memref<2xindex>
-// CHECK:         %[[TMP_4:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:         %[[TMP_5:.*]] = memref.cast %[[TMP_4]] : memref<2xindex> to memref<?xindex>
-// CHECK:         memref.store %[[TMP_c0]], %[[TMP_4]][%[[TMP_c0]]] : memref<2xindex>
-// CHECK:         memref.store %[[TMP_c1]], %[[TMP_4]][%[[TMP_c1]]] : memref<2xindex>
-// CHECK:         %[[TMP_6:.*]] = llvm.mlir.null : !llvm.ptr<i8>
-// CHECK:         %[[TMP_7:.*]] = call @newSparseTensor(%[[TMP_1]], %[[TMP_3]], %[[TMP_5]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c4_i32]], %[[TMP_6]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+// CHECK-DAG:     %[[LvlTypes_0:.*]] = memref.alloca() : memref<2xi8>
+// CHECK-DAG:     %[[LvlTypesP_0:.*]] = memref.cast %[[LvlTypes_0]] : memref<2xi8> to memref<?xi8>
+// CHECK-DAG:     memref.store %[[TMP_c8_i8]], %[[LvlTypes_0]][%[[TMP_c0]]] : memref<2xi8>
+// CHECK-DAG:     memref.store %[[TMP_c8_i8]], %[[LvlTypes_0]][%[[TMP_c1]]] : memref<2xi8>
+// CHECK-DAG:     %[[DimSizes_0:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:     %[[DimSizesP_0:.*]] = memref.cast %[[DimSizes_0]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:     memref.store %[[TMP_c5]], %[[DimSizes_0]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK-DAG:     memref.store %[[TMP_c4]], %[[DimSizes_0]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK-DAG:     %[[LvlSizes_0:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:     %[[LvlSizesP_0:.*]] = memref.cast %[[LvlSizes_0]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:     %[[Iota_0:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:     %[[IotaP_0:.*]] = memref.cast %[[Iota_0]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:     memref.store %[[TMP_c0]], %[[Iota_0]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK-DAG:     memref.store %[[TMP_c1]], %[[Iota_0]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK-DAG:     %[[NullPtr:.*]] = llvm.mlir.null : !llvm.ptr<i8>
+// CHECK:         %[[TMP_7:.*]] = call @newSparseTensor(%[[DimSizesP_0]], %[[LvlSizesP_0]], %[[LvlTypesP_0]], %[[IotaP_0]], %[[IotaP_0]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c4_i32]], %[[NullPtr]])
 // CHECK:         %[[TMP_8:.*]] = memref.alloca() : memref<f64>
 // CHECK:         %[[TMP_9:.*]] = memref.alloca() : memref<2xindex>
 // CHECK:         %[[TMP_10:.*]] = memref.cast %[[TMP_9]] : memref<2xindex> to memref<?xindex>
@@ -109,23 +113,25 @@
 // CHECK:             %[[TMP_23:.*]] = arith.cmpf une, %[[TMP_22]], %[[TMP_cst]] : f64
 // CHECK:             scf.if %[[TMP_23]] {
 // CHECK:               memref.store %[[TMP_22]], %[[TMP_8]][] : memref<f64>
-// CHECK:               %[[TMP_24:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_8]], %[[TMP_10]], %[[TMP_5]]) : (!llvm.ptr<i8>, memref<f64>, memref<?xindex>, memref<?xindex>) -> !llvm.ptr<i8>
+// CHECK:               %[[TMP_24:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_8]], %[[TMP_10]], %[[IotaP_0]]) : (!llvm.ptr<i8>, memref<f64>, memref<?xindex>, memref<?xindex>) -> !llvm.ptr<i8>
 // CHECK:             }
 // CHECK:           }
 // CHECK:         }
-// CHECK:         %[[TMP_11:.*]] = memref.alloca() : memref<2xi8>
-// CHECK:         %[[TMP_12:.*]] = memref.cast %[[TMP_11]] : memref<2xi8> to memref<?xi8>
-// CHECK:         memref.store %[[TMP_c8_i8]], %[[TMP_11]][%[[TMP_c0]]] : memref<2xi8>
-// CHECK:         memref.store %[[TMP_c8_i8]], %[[TMP_11]][%[[TMP_c1]]] : memref<2xi8>
-// CHECK:         %[[TMP_13:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:         %[[TMP_14:.*]] = memref.cast %[[TMP_13]] : memref<2xindex> to memref<?xindex>
-// CHECK:         memref.store %[[TMP_c3]], %[[TMP_13]][%[[TMP_c0]]] : memref<2xindex>
-// CHECK:         memref.store %[[TMP_c4]], %[[TMP_13]][%[[TMP_c1]]] : memref<2xindex>
-// CHECK:         %[[TMP_15:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:         %[[TMP_16:.*]] = memref.cast %[[TMP_15]] : memref<2xindex> to memref<?xindex>
-// CHECK:         memref.store %[[TMP_c0]], %[[TMP_15]][%[[TMP_c0]]] : memref<2xindex>
-// CHECK:         memref.store %[[TMP_c1]], %[[TMP_15]][%[[TMP_c1]]] : memref<2xindex>
-// CHECK:         %[[TMP_17:.*]] = call @newSparseTensor(%[[TMP_12]], %[[TMP_14]], %[[TMP_16]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+// CHECK-DAG:     %[[LvlTypes_1:.*]] = memref.alloca() : memref<2xi8>
+// CHECK-DAG:     %[[LvlTypesP_1:.*]] = memref.cast %[[LvlTypes_1]] : memref<2xi8> to memref<?xi8>
+// CHECK-DAG:     memref.store %[[TMP_c8_i8]], %[[LvlTypes_1]][%[[TMP_c0]]] : memref<2xi8>
+// CHECK-DAG:     memref.store %[[TMP_c8_i8]], %[[LvlTypes_1]][%[[TMP_c1]]] : memref<2xi8>
+// CHECK-DAG:     %[[DimSizes_1:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:     %[[DimSizesP_1:.*]] = memref.cast %[[DimSizes_1]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:     memref.store %[[TMP_c3]], %[[DimSizes_1]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK-DAG:     memref.store %[[TMP_c4]], %[[DimSizes_1]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK-DAG:     %[[LvlSizes_1:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:     %[[LvlSizesP_1:.*]] = memref.cast %[[LvlSizes_1]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:     %[[Iota_1:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:     %[[IotaP_1:.*]] = memref.cast %[[Iota_1]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:     memref.store %[[TMP_c0]], %[[Iota_1]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK-DAG:     memref.store %[[TMP_c1]], %[[Iota_1]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK:         %[[TMP_17:.*]] = call @newSparseTensor(%[[DimSizesP_1]], %[[LvlSizesP_1]], %[[LvlTypesP_1]], %[[IotaP_1]], %[[IotaP_1]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]])
 // CHECK:         %[[TMP_18:.*]] = memref.alloca() : memref<2xindex>
 // CHECK:         %[[TMP_19:.*]] = memref.cast %[[TMP_18]] : memref<2xindex> to memref<?xindex>
 // CHECK:         %[[TMP_20:.*]] = memref.alloca() : memref<f64>
@@ -138,11 +144,11 @@
 // CHECK:           %[[TMP_24:.*]] = memref.load %[[TMP_18]][%[[TMP_c1]]] : memref<2xindex>
 // CHECK:           memref.store %[[TMP_23]], %[[TMP_9]][%[[TMP_c0]]] : memref<2xindex>
 // CHECK:           memref.store %[[TMP_24]], %[[TMP_9]][%[[TMP_c1]]] : memref<2xindex>
-// CHECK:           %[[TMP_25:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_20]], %[[TMP_10]], %[[TMP_5]]) : (!llvm.ptr<i8>, memref<f64>, memref<?xindex>, memref<?xindex>) -> !llvm.ptr<i8>
+// CHECK:           %[[TMP_25:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_20]], %[[TMP_10]], %[[IotaP_0]]) : (!llvm.ptr<i8>, memref<f64>, memref<?xindex>, memref<?xindex>) -> !llvm.ptr<i8>
 // CHECK:           scf.yield
 // CHECK:         }
 // CHECK:         call @delSparseTensorIteratorF64(%[[TMP_17]]) : (!llvm.ptr<i8>) -> ()
-// CHECK:         %[[TMP_21:.*]] = call @newSparseTensor(%[[TMP_1]], %[[TMP_3]], %[[TMP_5]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c2_i32]], %[[TMP_7]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+// CHECK:         %[[TMP_21:.*]] = call @newSparseTensor(%[[DimSizesP_0]], %[[LvlSizesP_0]], %[[LvlTypesP_0]], %[[IotaP_0]], %[[IotaP_0]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c2_i32]], %[[TMP_7]])
 // CHECK:         call @delSparseTensorCOOF64(%[[TMP_7]]) : (!llvm.ptr<i8>) -> ()
 // CHECK:         return %[[TMP_21]] : !llvm.ptr<i8>
 // CHECK:       }
@@ -168,20 +174,24 @@
 // CHECK-DAG:     %[[TMP_c4:.*]] = arith.constant 4 : index
 // CHECK-DAG:     %[[TMP_c5:.*]] = arith.constant 5 : index
 // CHECK-DAG:     %[[TMP_c8_i8:.*]] = arith.constant 8 : i8
-// CHECK:         %[[TMP_0:.*]] = memref.alloca() : memref<2xi8>
-// CHECK:         %[[TMP_1:.*]] = memref.cast %[[TMP_0]] : memref<2xi8> to memref<?xi8>
-// CHECK:         memref.store %[[TMP_c8_i8]], %[[TMP_0]][%[[TMP_c0]]] : memref<2xi8>
-// CHECK:         memref.store %[[TMP_c8_i8]], %[[TMP_0]][%[[TMP_c1]]] : memref<2xi8>
-// CHECK:         %[[TMP_2:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:         %[[TMP_3:.*]] = memref.cast %[[TMP_2]] : memref<2xindex> to memref<?xindex>
-// CHECK:         memref.store %[[TMP_c4]], %[[TMP_2]][%[[TMP_c0]]] : memref<2xindex>
-// CHECK:         memref.store %[[TMP_c5]], %[[TMP_2]][%[[TMP_c1]]] : memref<2xindex>
-// CHECK:         %[[TMP_4:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:         %[[TMP_5:.*]] = memref.cast %[[TMP_4]] : memref<2xindex> to memref<?xindex>
-// CHECK:         memref.store %[[TMP_c1]], %[[TMP_4]][%[[TMP_c0]]] : memref<2xindex>
-// CHECK:         memref.store %[[TMP_c0]], %[[TMP_4]][%[[TMP_c1]]] : memref<2xindex>
-// CHECK:         %[[TMP_6:.*]] = llvm.mlir.null : !llvm.ptr<i8>
-// CHECK:         %[[TMP_7:.*]] = call @newSparseTensor(%[[TMP_1]], %[[TMP_3]], %[[TMP_5]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c4_i32]], %[[TMP_6]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+// CHECK-DAG:     %[[LvlTypes_0:.*]] = memref.alloca() : memref<2xi8>
+// CHECK-DAG:     %[[LvlTypesP_0:.*]] = memref.cast %[[LvlTypes_0]] : memref<2xi8> to memref<?xi8>
+// CHECK-DAG:     memref.store %[[TMP_c8_i8]], %[[LvlTypes_0]][%[[TMP_c0]]] : memref<2xi8>
+// CHECK-DAG:     memref.store %[[TMP_c8_i8]], %[[LvlTypes_0]][%[[TMP_c1]]] : memref<2xi8>
+// CHECK-DAG:     %[[DimSizes_0:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:     %[[DimSizesP_0:.*]] = memref.cast %[[DimSizes_0]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:     memref.store %[[TMP_c4]], %[[DimSizes_0]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK-DAG:     memref.store %[[TMP_c5]], %[[DimSizes_0]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK-DAG:     %[[LvlSizes_0:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:     %[[LvlSizesP_0:.*]] = memref.cast %[[LvlSizes_0]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:     %[[Lvl2Dim_0:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:     %[[Lvl2DimP_0:.*]] = memref.cast %[[Lvl2Dim_0]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:     %[[Dim2Lvl_0:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:     %[[Dim2LvlP_0:.*]] = memref.cast %[[Dim2Lvl_0]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:     memref.store %[[TMP_c1]], %[[Dim2Lvl_0]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK-DAG:     memref.store %[[TMP_c0]], %[[Dim2Lvl_0]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK-DAG:     %[[NullPtr:.*]] = llvm.mlir.null : !llvm.ptr<i8>
+// CHECK:         %[[TMP_7:.*]] = call @newSparseTensor(%[[DimSizesP_0]], %[[LvlSizesP_0]], %[[LvlTypesP_0]], %[[Lvl2DimP_0]], %[[Dim2LvlP_0]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c4_i32]], %[[NullPtr]])
 // CHECK:         %[[TMP_8:.*]] = memref.alloca() : memref<f64>
 // CHECK:         %[[TMP_9:.*]] = memref.alloca() : memref<2xindex>
 // CHECK:         %[[TMP_10:.*]] = memref.cast %[[TMP_9]] : memref<2xindex> to memref<?xindex>
@@ -193,23 +203,25 @@
 // CHECK:             %[[TMP_23:.*]] = arith.cmpf une, %[[TMP_22]], %[[TMP_cst]] : f64
 // CHECK:             scf.if %[[TMP_23]] {
 // CHECK:               memref.store %[[TMP_22]], %[[TMP_8]][] : memref<f64>
-// CHECK:               %[[TMP_24:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_8]], %[[TMP_10]], %[[TMP_5]]) : (!llvm.ptr<i8>, memref<f64>, memref<?xindex>, memref<?xindex>) -> !llvm.ptr<i8>
+// CHECK:               %[[TMP_24:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_8]], %[[TMP_10]], %[[Dim2LvlP_0]]) : (!llvm.ptr<i8>, memref<f64>, memref<?xindex>, memref<?xindex>) -> !llvm.ptr<i8>
 // CHECK:             }
 // CHECK:           }
 // CHECK:         }
-// CHECK:         %[[TMP_11:.*]] = memref.alloca() : memref<2xi8>
-// CHECK:         %[[TMP_12:.*]] = memref.cast %[[TMP_11]] : memref<2xi8> to memref<?xi8>
-// CHECK:         memref.store %[[TMP_c8_i8]], %[[TMP_11]][%[[TMP_c0]]] : memref<2xi8>
-// CHECK:         memref.store %[[TMP_c8_i8]], %[[TMP_11]][%[[TMP_c1]]] : memref<2xi8>
-// CHECK:         %[[TMP_13:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:         %[[TMP_14:.*]] = memref.cast %[[TMP_13]] : memref<2xindex> to memref<?xindex>
-// CHECK:         memref.store %[[TMP_c4]], %[[TMP_13]][%[[TMP_c0]]] : memref<2xindex>
-// CHECK:         memref.store %[[TMP_c3]], %[[TMP_13]][%[[TMP_c1]]] : memref<2xindex>
-// CHECK:         %[[TMP_15:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:         %[[TMP_16:.*]] = memref.cast %[[TMP_15]] : memref<2xindex> to memref<?xindex>
-// CHECK:         memref.store %[[TMP_c0]], %[[TMP_15]][%[[TMP_c0]]] : memref<2xindex>
-// CHECK:         memref.store %[[TMP_c1]], %[[TMP_15]][%[[TMP_c1]]] : memref<2xindex>
-// CHECK:         %[[TMP_17:.*]] = call @newSparseTensor(%[[TMP_12]], %[[TMP_14]], %[[TMP_16]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+// CHECK-DAG:     %[[LvlTypes_1:.*]] = memref.alloca() : memref<2xi8>
+// CHECK-DAG:     %[[LvlTypesP_1:.*]] = memref.cast %[[LvlTypes_1]] : memref<2xi8> to memref<?xi8>
+// CHECK-DAG:     memref.store %[[TMP_c8_i8]], %[[LvlTypes_1]][%[[TMP_c0]]] : memref<2xi8>
+// CHECK-DAG:     memref.store %[[TMP_c8_i8]], %[[LvlTypes_1]][%[[TMP_c1]]] : memref<2xi8>
+// CHECK-DAG:     %[[DimSizes_1:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:     %[[DimSizesP_1:.*]] = memref.cast %[[DimSizes_1]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:     memref.store %[[TMP_c4]], %[[DimSizes_1]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK-DAG:     memref.store %[[TMP_c3]], %[[DimSizes_1]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK-DAG:     %[[LvlSizes_1:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:     %[[LvlSizesP_1:.*]] = memref.cast %[[LvlSizes_1]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:     %[[Iota_1:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:     %[[IotaP_1:.*]] = memref.cast %[[Iota_1]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:     memref.store %[[TMP_c0]], %[[Iota_1]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK-DAG:     memref.store %[[TMP_c1]], %[[Iota_1]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK:         %[[TMP_17:.*]] = call @newSparseTensor(%[[DimSizesP_1]], %[[LvlSizesP_1]], %[[LvlTypesP_1]], %[[IotaP_1]], %[[IotaP_1]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]])
 // CHECK:         %[[TMP_18:.*]] = memref.alloca() : memref<2xindex>
 // CHECK:         %[[TMP_19:.*]] = memref.cast %[[TMP_18]] : memref<2xindex> to memref<?xindex>
 // CHECK:         %[[TMP_20:.*]] = memref.alloca() : memref<f64>
@@ -222,11 +234,11 @@
 // CHECK:           %[[TMP_24:.*]] = arith.addi %[[TMP_23]], %[[TMP_c2]] : index
 // CHECK:           memref.store %[[TMP_22]], %[[TMP_9]][%[[TMP_c0]]] : memref<2xindex>
 // CHECK:           memref.store %[[TMP_24]], %[[TMP_9]][%[[TMP_c1]]] : memref<2xindex>
-// CHECK:           %[[TMP_25:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_20]], %[[TMP_10]], %[[TMP_5]]) : (!llvm.ptr<i8>, memref<f64>, memref<?xindex>, memref<?xindex>) -> !llvm.ptr<i8>
+// CHECK:           %[[TMP_25:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_20]], %[[TMP_10]], %[[Dim2LvlP_0]]) : (!llvm.ptr<i8>, memref<f64>, memref<?xindex>, memref<?xindex>) -> !llvm.ptr<i8>
 // CHECK:           scf.yield
 // CHECK:         }
 // CHECK:         call @delSparseTensorIteratorF64(%[[TMP_17]]) : (!llvm.ptr<i8>) -> ()
-// CHECK:         %[[TMP_21:.*]] = call @newSparseTensor(%[[TMP_1]], %[[TMP_3]], %[[TMP_5]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c2_i32]], %[[TMP_7]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+// CHECK:         %[[TMP_21:.*]] = call @newSparseTensor(%[[DimSizesP_0]], %[[LvlSizesP_0]], %[[LvlTypesP_0]], %[[Lvl2DimP_0]], %[[Dim2LvlP_0]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c2_i32]], %[[TMP_7]])
 // CHECK:         call @delSparseTensorCOOF64(%[[TMP_7]]) : (!llvm.ptr<i8>) -> ()
 // CHECK:         return %[[TMP_21]] : !llvm.ptr<i8>
 // CHECK:       }
@@ -260,19 +272,21 @@
 // CHECK:             }
 // CHECK:           }
 // CHECK:         }
-// CHECK:         %[[TMP_1:.*]] = memref.alloca() : memref<2xi8>
-// CHECK:         %[[TMP_2:.*]] = memref.cast %[[TMP_1]] : memref<2xi8> to memref<?xi8>
-// CHECK:         memref.store %[[TMP_c8_i8]], %[[TMP_1]][%[[TMP_c0]]] : memref<2xi8>
-// CHECK:         memref.store %[[TMP_c8_i8]], %[[TMP_1]][%[[TMP_c1]]] : memref<2xi8>
-// CHECK:         %[[TMP_3:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:         %[[TMP_4:.*]] = memref.cast %[[TMP_3]] : memref<2xindex> to memref<?xindex>
-// CHECK:         memref.store %[[TMP_c4]], %[[TMP_3]][%[[TMP_c0]]] : memref<2xindex>
-// CHECK:         memref.store %[[TMP_c3]], %[[TMP_3]][%[[TMP_c1]]] : memref<2xindex>
-// CHECK:         %[[TMP_5:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:         %[[TMP_6:.*]] = memref.cast %[[TMP_5]] : memref<2xindex> to memref<?xindex>
-// CHECK:         memref.store %[[TMP_c0]], %[[TMP_5]][%[[TMP_c0]]] : memref<2xindex>
-// CHECK:         memref.store %[[TMP_c1]], %[[TMP_5]][%[[TMP_c1]]] : memref<2xindex>
-// CHECK:         %[[TMP_7:.*]] = call @newSparseTensor(%[[TMP_2]], %[[TMP_4]], %[[TMP_6]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+// CHECK-DAG:     %[[LvlTypes:.*]] = memref.alloca() : memref<2xi8>
+// CHECK-DAG:     %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<2xi8> to memref<?xi8>
+// CHECK-DAG:     memref.store %[[TMP_c8_i8]], %[[LvlTypes]][%[[TMP_c0]]] : memref<2xi8>
+// CHECK-DAG:     memref.store %[[TMP_c8_i8]], %[[LvlTypes]][%[[TMP_c1]]] : memref<2xi8>
+// CHECK-DAG:     %[[DimSizes:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:     %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:     memref.store %[[TMP_c4]], %[[DimSizes]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK-DAG:     memref.store %[[TMP_c3]], %[[DimSizes]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK-DAG:     %[[LvlSizes:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:     %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:     %[[Iota:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:     %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:     memref.store %[[TMP_c0]], %[[Iota]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK-DAG:     memref.store %[[TMP_c1]], %[[Iota]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK:         %[[TMP_7:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]])
 // CHECK:         %[[TMP_8:.*]] = memref.alloca() : memref<2xindex>
 // CHECK:         %[[TMP_9:.*]] = memref.cast %[[TMP_8]] : memref<2xindex> to memref<?xindex>
 // CHECK:         %[[TMP_10:.*]] = memref.alloca() : memref<f64>
@@ -321,19 +335,21 @@
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[TMP_2:.*]] = memref.alloca() : memref<2xi8>
-// CHECK:           %[[TMP_3:.*]] = memref.cast %[[TMP_2]] : memref<2xi8> to memref<?xi8>
-// CHECK:           memref.store %[[TMP_c8_i8]], %[[TMP_2]][%[[TMP_c0]]] : memref<2xi8>
-// CHECK:           memref.store %[[TMP_c8_i8]], %[[TMP_2]][%[[TMP_c1]]] : memref<2xi8>
-// CHECK:           %[[TMP_4:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:           %[[TMP_5:.*]] = memref.cast %[[TMP_4]] : memref<2xindex> to memref<?xindex>
-// CHECK:           memref.store %[[TMP_c3]], %[[TMP_4]][%[[TMP_c0]]] : memref<2xindex>
-// CHECK:           memref.store %[[TMP_c3]], %[[TMP_4]][%[[TMP_c1]]] : memref<2xindex>
-// CHECK:           %[[TMP_6:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:           %[[TMP_7:.*]] = memref.cast %[[TMP_6]] : memref<2xindex> to memref<?xindex>
-// CHECK:           memref.store %[[TMP_c0]], %[[TMP_6]][%[[TMP_c0]]] : memref<2xindex>
-// CHECK:           memref.store %[[TMP_c1]], %[[TMP_6]][%[[TMP_c1]]] : memref<2xindex>
-// CHECK:           %[[TMP_8:.*]] = call @newSparseTensor(%[[TMP_3]], %[[TMP_5]], %[[TMP_7]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+// CHECK-DAG:       %[[LvlTypes:.*]] = memref.alloca() : memref<2xi8>
+// CHECK-DAG:       %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<2xi8> to memref<?xi8>
+// CHECK-DAG:       memref.store %[[TMP_c8_i8]], %[[LvlTypes]][%[[TMP_c0]]] : memref<2xi8>
+// CHECK-DAG:       memref.store %[[TMP_c8_i8]], %[[LvlTypes]][%[[TMP_c1]]] : memref<2xi8>
+// CHECK-DAG:       %[[DimSizes:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:       %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:       memref.store %[[TMP_c3]], %[[DimSizes]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK-DAG:       memref.store %[[TMP_c3]], %[[DimSizes]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK-DAG:       %[[LvlSizes:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:       %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:       %[[Iota:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:       %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:       memref.store %[[TMP_c0]], %[[Iota]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK-DAG:       memref.store %[[TMP_c1]], %[[Iota]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK:           %[[TMP_8:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]])
 // CHECK:           %[[TMP_9:.*]] = memref.alloca() : memref<2xindex>
 // CHECK:           %[[TMP_10:.*]] = memref.cast %[[TMP_9]] : memref<2xindex> to memref<?xindex>
 // CHECK:           %[[TMP_11:.*]] = memref.alloca() : memref<f64>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_expand.mlir b/mlir/test/Dialect/SparseTensor/sparse_expand.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_expand.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_expand.mlir
@@ -47,9 +47,9 @@
 //
 // CHECK-CONVERT-LABEL: func @kernel(
 // CHECK-CONVERT: %[[C:.*]] = arith.constant 0 : index
-// CHECK-CONVERT: %{{.*}} = call @sparseDimSize
+// CHECK-CONVERT: %{{.*}} = call @sparseLvlSize
 // CHECK-CONVERT: %[[N:.*]] = call @newSparseTensor
-// CHECK-CONVERT: %[[S:.*]] = call @sparseDimSize(%[[N]], %[[C]])
+// CHECK-CONVERT: %[[S:.*]] = call @sparseLvlSize(%[[N]], %[[C]])
 // CHECK-CONVERT: %[[A:.*]] = memref.alloc(%[[S]]) : memref<?xf64>
 // CHECK-CONVERT: %[[B:.*]] = memref.alloc(%[[S]]) : memref<?xi1>
 // CHECK-CONVERT: %[[C:.*]] = memref.alloc(%[[S]]) : memref<?xindex>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir b/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir
@@ -3,60 +3,64 @@
 #DCSR = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>
 
 // CHECK-LABEL:   func.func @fill_zero_after_alloc(
-// CHECK-SAME:      %[[VAL_0:.*]]: !llvm.ptr<i8>,
-// CHECK-SAME:      %[[VAL_1:.*]]: !llvm.ptr<i8>) -> !llvm.ptr<i8> {
-// CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f64
-// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : i32
-// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : i32
-// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_7:.*]] = arith.constant false
-// CHECK-DAG:       %[[VAL_8:.*]] = arith.constant true
-// CHECK-DAG:       %[[VAL_9:.*]] = arith.constant 100 : index
-// CHECK-DAG:       %[[VAL_10:.*]] = arith.constant 300 : index
-// CHECK-DAG:       %[[VAL_11:.*]] = arith.constant 8 : i8
-// CHECK:           %[[VAL_12:.*]] = memref.alloca() : memref<2xi8>
-// CHECK:           %[[VAL_13:.*]] = memref.cast %[[VAL_12]] : memref<2xi8> to memref<?xi8>
-// CHECK:           memref.store %[[VAL_11]], %[[VAL_12]]{{\[}}%[[VAL_5]]] : memref<2xi8>
-// CHECK:           memref.store %[[VAL_11]], %[[VAL_12]]{{\[}}%[[VAL_6]]] : memref<2xi8>
-// CHECK:           %[[VAL_14:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:           %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref<?xindex>
-// CHECK:           memref.store %[[VAL_9]], %[[VAL_14]]{{\[}}%[[VAL_5]]] : memref<2xindex>
-// CHECK:           memref.store %[[VAL_10]], %[[VAL_14]]{{\[}}%[[VAL_6]]] : memref<2xindex>
-// CHECK:           %[[VAL_16:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:           %[[VAL_17:.*]] = memref.cast %[[VAL_16]] : memref<2xindex> to memref<?xindex>
-// CHECK:           memref.store %[[VAL_5]], %[[VAL_16]]{{\[}}%[[VAL_5]]] : memref<2xindex>
-// CHECK:           memref.store %[[VAL_6]], %[[VAL_16]]{{\[}}%[[VAL_6]]] : memref<2xindex>
-// CHECK:           %[[VAL_18:.*]] = llvm.mlir.null : !llvm.ptr<i8>
-// CHECK:           %[[VAL_19:.*]] = call @newSparseTensor(%[[VAL_13]], %[[VAL_15]], %[[VAL_17]], %[[VAL_4]], %[[VAL_4]], %[[VAL_3]], %[[VAL_4]], %[[VAL_18]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+// CHECK-SAME:      %[[Arg0:.*]]: !llvm.ptr<i8>,
+// CHECK-SAME:      %[[Arg1:.*]]: !llvm.ptr<i8>) -> !llvm.ptr<i8> {
+// CHECK-DAG:       %[[F0:.*]] = arith.constant 0.000000e+00 : f64
+// CHECK-DAG:       %[[C1:.*]] = arith.constant 1 : i32
+// CHECK-DAG:       %[[C0:.*]] = arith.constant 0 : i32
+// CHECK-DAG:       %[[I0:.*]] = arith.constant 0 : index
+// CHECK-DAG:       %[[I1:.*]] = arith.constant 1 : index
+// CHECK-DAG:       %[[False:.*]] = arith.constant false
+// CHECK-DAG:       %[[True:.*]] = arith.constant true
+// CHECK-DAG:       %[[I100:.*]] = arith.constant 100 : index
+// CHECK-DAG:       %[[I300:.*]] = arith.constant 300 : index
+// CHECK-DAG:       %[[CompressedDLT:.*]] = arith.constant 8 : i8
+// CHECK-DAG:       %[[LvlTypes:.*]] = memref.alloca() : memref<2xi8>
+// CHECK-DAG:       %[[LvlTypesP:.*]] = memref.cast %[[LvlTypes]] : memref<2xi8> to memref<?xi8>
+// CHECK-DAG:       memref.store %[[CompressedDLT]], %[[LvlTypes]]{{\[}}%[[I0]]] : memref<2xi8>
+// CHECK-DAG:       memref.store %[[CompressedDLT]], %[[LvlTypes]]{{\[}}%[[I1]]] : memref<2xi8>
+// CHECK-DAG:       %[[DimSizes:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:       %[[DimSizesP:.*]] = memref.cast %[[DimSizes]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:       memref.store %[[I100]], %[[DimSizes]]{{\[}}%[[I0]]] : memref<2xindex>
+// CHECK-DAG:       memref.store %[[I300]], %[[DimSizes]]{{\[}}%[[I1]]] : memref<2xindex>
+// CHECK-DAG:       %[[LvlSizes:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:       %[[LvlSizesP:.*]] = memref.cast %[[LvlSizes]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:       memref.store %[[I100]], %[[LvlSizes]]{{\[}}%[[I0]]] : memref<2xindex>
+// CHECK-DAG:       memref.store %[[I300]], %[[LvlSizes]]{{\[}}%[[I1]]] : memref<2xindex>
+// CHECK-DAG:       %[[Iota:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:       %[[IotaP:.*]] = memref.cast %[[Iota]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:       memref.store %[[I0]], %[[Iota]]{{\[}}%[[I0]]] : memref<2xindex>
+// CHECK-DAG:       memref.store %[[I1]], %[[Iota]]{{\[}}%[[I1]]] : memref<2xindex>
+// CHECK-DAG:       %[[NullPtr:.*]] = llvm.mlir.null : !llvm.ptr<i8>
+// CHECK:           %[[VAL_19:.*]] = call @newSparseTensor(%[[DimSizesP]], %[[LvlSizesP]], %[[LvlTypesP]], %[[IotaP]], %[[IotaP]], %[[C0]], %[[C0]], %[[C1]], %[[C0]], %[[NullPtr]]) : (memref<?xindex>, memref<?xindex>, memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
 // CHECK:           %[[VAL_20:.*]] = memref.alloc() : memref<300xf64>
 // CHECK:           %[[VAL_21:.*]] = memref.cast %[[VAL_20]] : memref<300xf64> to memref<?xf64>
 // CHECK:           %[[VAL_22:.*]] = memref.alloc() : memref<300xi1>
 // CHECK:           %[[VAL_23:.*]] = memref.cast %[[VAL_22]] : memref<300xi1> to memref<?xi1>
 // CHECK:           %[[VAL_24:.*]] = memref.alloc() : memref<300xindex>
 // CHECK:           %[[VAL_25:.*]] = memref.cast %[[VAL_24]] : memref<300xindex> to memref<?xindex>
-// CHECK:           linalg.fill ins(%[[VAL_2]] : f64) outs(%[[VAL_20]] : memref<300xf64>)
-// CHECK:           linalg.fill ins(%[[VAL_7]] : i1) outs(%[[VAL_22]] : memref<300xi1>)
-// CHECK:           %[[VAL_26:.*]] = call @sparsePointers0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
-// CHECK:           %[[VAL_27:.*]] = call @sparseIndices0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
-// CHECK:           %[[VAL_28:.*]] = call @sparsePointers0(%[[VAL_0]], %[[VAL_6]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
-// CHECK:           %[[VAL_29:.*]] = call @sparseIndices0(%[[VAL_0]], %[[VAL_6]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
-// CHECK:           %[[VAL_30:.*]] = call @sparseValuesF64(%[[VAL_0]]) : (!llvm.ptr<i8>) -> memref<?xf64>
-// CHECK:           %[[VAL_31:.*]] = call @sparsePointers0(%[[VAL_1]], %[[VAL_5]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
-// CHECK:           %[[VAL_32:.*]] = call @sparseIndices0(%[[VAL_1]], %[[VAL_5]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
-// CHECK:           %[[VAL_33:.*]] = call @sparsePointers0(%[[VAL_1]], %[[VAL_6]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
-// CHECK:           %[[VAL_34:.*]] = call @sparseIndices0(%[[VAL_1]], %[[VAL_6]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
-// CHECK:           %[[VAL_35:.*]] = call @sparseValuesF64(%[[VAL_1]]) : (!llvm.ptr<i8>) -> memref<?xf64>
-// CHECK:           %[[VAL_36:.*]] = memref.load %[[VAL_26]]{{\[}}%[[VAL_5]]] : memref<?xindex>
-// CHECK:           %[[VAL_37:.*]] = memref.load %[[VAL_26]]{{\[}}%[[VAL_6]]] : memref<?xindex>
-// CHECK:           scf.for %[[VAL_38:.*]] = %[[VAL_36]] to %[[VAL_37]] step %[[VAL_6]] {
+// CHECK:           linalg.fill ins(%[[F0]] : f64) outs(%[[VAL_20]] : memref<300xf64>)
+// CHECK:           linalg.fill ins(%[[False]] : i1) outs(%[[VAL_22]] : memref<300xi1>)
+// CHECK:           %[[VAL_26:.*]] = call @sparsePointers0(%[[Arg0]], %[[I0]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
+// CHECK:           %[[VAL_27:.*]] = call @sparseIndices0(%[[Arg0]], %[[I0]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
+// CHECK:           %[[VAL_28:.*]] = call @sparsePointers0(%[[Arg0]], %[[I1]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
+// CHECK:           %[[VAL_29:.*]] = call @sparseIndices0(%[[Arg0]], %[[I1]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
+// CHECK:           %[[VAL_30:.*]] = call @sparseValuesF64(%[[Arg0]]) : (!llvm.ptr<i8>) -> memref<?xf64>
+// CHECK:           %[[VAL_31:.*]] = call @sparsePointers0(%[[Arg1]], %[[I0]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
+// CHECK:           %[[VAL_32:.*]] = call @sparseIndices0(%[[Arg1]], %[[I0]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
+// CHECK:           %[[VAL_33:.*]] = call @sparsePointers0(%[[Arg1]], %[[I1]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
+// CHECK:           %[[VAL_34:.*]] = call @sparseIndices0(%[[Arg1]], %[[I1]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
+// CHECK:           %[[VAL_35:.*]] = call @sparseValuesF64(%[[Arg1]]) : (!llvm.ptr<i8>) -> memref<?xf64>
+// CHECK:           %[[VAL_36:.*]] = memref.load %[[VAL_26]]{{\[}}%[[I0]]] : memref<?xindex>
+// CHECK:           %[[VAL_37:.*]] = memref.load %[[VAL_26]]{{\[}}%[[I1]]] : memref<?xindex>
+// CHECK:           scf.for %[[VAL_38:.*]] = %[[VAL_36]] to %[[VAL_37]] step %[[I1]] {
 // CHECK:             %[[VAL_39:.*]] = memref.load %[[VAL_27]]{{\[}}%[[VAL_38]]] : memref<?xindex>
 // CHECK:             %[[VAL_40:.*]] = memref.load %[[VAL_28]]{{\[}}%[[VAL_38]]] : memref<?xindex>
-// CHECK:             %[[VAL_41:.*]] = arith.addi %[[VAL_38]], %[[VAL_6]] : index
+// CHECK:             %[[VAL_41:.*]] = arith.addi %[[VAL_38]], %[[I1]] : index
 // CHECK:             %[[VAL_42:.*]] = memref.load %[[VAL_28]]{{\[}}%[[VAL_41]]] : memref<?xindex>
-// CHECK:             %[[VAL_43:.*]] = memref.load %[[VAL_31]]{{\[}}%[[VAL_5]]] : memref<?xindex>
-// CHECK:             %[[VAL_44:.*]] = memref.load %[[VAL_31]]{{\[}}%[[VAL_6]]] : memref<?xindex>
-// CHECK:             %[[VAL_45:.*]]:3 = scf.while (%[[VAL_46:.*]] = %[[VAL_40]], %[[VAL_47:.*]] = %[[VAL_43]], %[[VAL_48:.*]] = %[[VAL_5]]) : (index, index, index) -> (index, index, index) {
+// CHECK:             %[[VAL_43:.*]] = memref.load %[[VAL_31]]{{\[}}%[[I0]]] : memref<?xindex>
+// CHECK:             %[[VAL_44:.*]] = memref.load %[[VAL_31]]{{\[}}%[[I1]]] : memref<?xindex>
+// CHECK:             %[[VAL_45:.*]]:3 = scf.while (%[[VAL_46:.*]] = %[[VAL_40]], %[[VAL_47:.*]] = %[[VAL_43]], %[[VAL_48:.*]] = %[[I0]]) : (index, index, index) -> (index, index, index) {
 // CHECK:               %[[VAL_49:.*]] = arith.cmpi ult, %[[VAL_46]], %[[VAL_42]] : index
 // CHECK:               %[[VAL_50:.*]] = arith.cmpi ult, %[[VAL_47]], %[[VAL_44]] : index
 // CHECK:               %[[VAL_51:.*]] = arith.andi %[[VAL_49]], %[[VAL_50]] : i1
@@ -73,20 +77,20 @@
 // CHECK:               %[[VAL_62:.*]] = scf.if %[[VAL_61]] -> (index) {
 // CHECK:                 %[[VAL_63:.*]] = memref.load %[[VAL_30]]{{\[}}%[[VAL_52]]] : memref<?xf64>
 // CHECK:                 %[[VAL_64:.*]] = memref.load %[[VAL_33]]{{\[}}%[[VAL_53]]] : memref<?xindex>
-// CHECK:                 %[[VAL_65:.*]] = arith.addi %[[VAL_53]], %[[VAL_6]] : index
+// CHECK:                 %[[VAL_65:.*]] = arith.addi %[[VAL_53]], %[[I1]] : index
 // CHECK:                 %[[VAL_66:.*]] = memref.load %[[VAL_33]]{{\[}}%[[VAL_65]]] : memref<?xindex>
-// CHECK:                 %[[VAL_67:.*]] = scf.for %[[VAL_68:.*]] = %[[VAL_64]] to %[[VAL_66]] step %[[VAL_6]] iter_args(%[[VAL_69:.*]] = %[[VAL_54]]) -> (index) {
+// CHECK:                 %[[VAL_67:.*]] = scf.for %[[VAL_68:.*]] = %[[VAL_64]] to %[[VAL_66]] step %[[I1]] iter_args(%[[VAL_69:.*]] = %[[VAL_54]]) -> (index) {
 // CHECK:                   %[[VAL_70:.*]] = memref.load %[[VAL_34]]{{\[}}%[[VAL_68]]] : memref<?xindex>
 // CHECK:                   %[[VAL_71:.*]] = memref.load %[[VAL_20]]{{\[}}%[[VAL_70]]] : memref<300xf64>
 // CHECK:                   %[[VAL_72:.*]] = memref.load %[[VAL_35]]{{\[}}%[[VAL_68]]] : memref<?xf64>
 // CHECK:                   %[[VAL_73:.*]] = arith.mulf %[[VAL_63]], %[[VAL_72]] : f64
 // CHECK:                   %[[VAL_74:.*]] = arith.addf %[[VAL_71]], %[[VAL_73]] : f64
 // CHECK:                   %[[VAL_75:.*]] = memref.load %[[VAL_22]]{{\[}}%[[VAL_70]]] : memref<300xi1>
-// CHECK:                   %[[VAL_76:.*]] = arith.cmpi eq, %[[VAL_75]], %[[VAL_7]] : i1
+// CHECK:                   %[[VAL_76:.*]] = arith.cmpi eq, %[[VAL_75]], %[[False]] : i1
 // CHECK:                   %[[VAL_77:.*]] = scf.if %[[VAL_76]] -> (index) {
-// CHECK:                     memref.store %[[VAL_8]], %[[VAL_22]]{{\[}}%[[VAL_70]]] : memref<300xi1>
+// CHECK:                     memref.store %[[True]], %[[VAL_22]]{{\[}}%[[VAL_70]]] : memref<300xi1>
 // CHECK:                     memref.store %[[VAL_70]], %[[VAL_24]]{{\[}}%[[VAL_69]]] : memref<300xindex>
-// CHECK:                     %[[VAL_78:.*]] = arith.addi %[[VAL_69]], %[[VAL_6]] : index
+// CHECK:                     %[[VAL_78:.*]] = arith.addi %[[VAL_69]], %[[I1]] : index
 // CHECK:                     scf.yield %[[VAL_78]] : index
 // CHECK:                   } else {
 // CHECK:                     scf.yield %[[VAL_69]] : index
@@ -98,15 +102,15 @@
 // CHECK:               } else {
 // CHECK:                 scf.yield %[[VAL_54]] : index
 // CHECK:               }
-// CHECK:               %[[VAL_81:.*]] = arith.addi %[[VAL_52]], %[[VAL_6]] : index
+// CHECK:               %[[VAL_81:.*]] = arith.addi %[[VAL_52]], %[[I1]] : index
 // CHECK:               %[[VAL_82:.*]] = arith.select %[[VAL_59]], %[[VAL_81]], %[[VAL_52]] : index
-// CHECK:               %[[VAL_83:.*]] = arith.addi %[[VAL_53]], %[[VAL_6]] : index
+// CHECK:               %[[VAL_83:.*]] = arith.addi %[[VAL_53]], %[[I1]] : index
 // CHECK:               %[[VAL_84:.*]] = arith.select %[[VAL_60]], %[[VAL_83]], %[[VAL_53]] : index
 // CHECK:               scf.yield %[[VAL_82]], %[[VAL_84]], %[[VAL_85:.*]] : index, index, index
 // CHECK:             }
 // CHECK:             %[[VAL_86:.*]] = memref.alloca() : memref<2xindex>
 // CHECK:             %[[VAL_87:.*]] = memref.cast %[[VAL_86]] : memref<2xindex> to memref<?xindex>
-// CHECK:             memref.store %[[VAL_39]], %[[VAL_86]]{{\[}}%[[VAL_5]]] : memref<2xindex>
+// CHECK:             memref.store %[[VAL_39]], %[[VAL_86]]{{\[}}%[[I0]]] : memref<2xindex>
 // CHECK:             func.call @expInsertF64(%[[VAL_19]], %[[VAL_87]], %[[VAL_21]], %[[VAL_23]], %[[VAL_25]], %[[VAL_88:.*]]#2) : (!llvm.ptr<i8>, memref<?xindex>, memref<?xf64>, memref<?xi1>, memref<?xindex>, index) -> ()
 // CHECK:           }
 // CHECK:           memref.dealloc %[[VAL_20]] : memref<300xf64>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir b/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir
@@ -54,9 +54,9 @@
 // CHECK-MIR-DAG:       %[[VAL_2:.*]] = arith.constant 2 : index
 // CHECK-MIR-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
 // CHECK-MIR-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
-// CHECK-MIR-DAG:       %[[VAL_5:.*]] = call @sparseDimSize(%[[VAL_0]], %[[VAL_4]]) : (!llvm.ptr<i8>, index) -> index
-// CHECK-MIR-DAG:       %[[VAL_6:.*]] = call @sparseDimSize(%[[VAL_0]], %[[VAL_3]]) : (!llvm.ptr<i8>, index) -> index
-// CHECK-MIR-DAG:       %[[VAL_7:.*]] = call @sparseDimSize(%[[VAL_0]], %[[VAL_2]]) : (!llvm.ptr<i8>, index) -> index
+// CHECK-MIR-DAG:       %[[VAL_5:.*]] = call @sparseLvlSize(%[[VAL_0]], %[[VAL_4]]) : (!llvm.ptr<i8>, index) -> index
+// CHECK-MIR-DAG:       %[[VAL_6:.*]] = call @sparseLvlSize(%[[VAL_0]], %[[VAL_3]]) : (!llvm.ptr<i8>, index) -> index
+// CHECK-MIR-DAG:       %[[VAL_7:.*]] = call @sparseLvlSize(%[[VAL_0]], %[[VAL_2]]) : (!llvm.ptr<i8>, index) -> index
 // CHECK-MIR-DAG:       %[[VAL_8:.*]] = call @sparseValuesF32(%[[VAL_0]]) : (!llvm.ptr<i8>) -> memref<?xf32>
 // CHECK-MIR-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<f32>
 // CHECK-MIR:           %[[VAL_11:.*]] = tensor.extract %[[VAL_1]][] : tensor<f32>
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -6835,13 +6835,16 @@
     srcs = [
         "lib/ExecutionEngine/SparseTensor/File.cpp",
         "lib/ExecutionEngine/SparseTensor/NNZ.cpp",
+        "lib/ExecutionEngine/SparseTensor/PermutationRef.cpp",
         "lib/ExecutionEngine/SparseTensor/Storage.cpp",
     ],
     hdrs = [
+        "include/mlir/ExecutionEngine/SparseTensor/Attributes.h",
         "include/mlir/ExecutionEngine/SparseTensor/COO.h",
         "include/mlir/ExecutionEngine/SparseTensor/CheckedMul.h",
         "include/mlir/ExecutionEngine/SparseTensor/ErrorHandling.h",
         "include/mlir/ExecutionEngine/SparseTensor/File.h",
+        "include/mlir/ExecutionEngine/SparseTensor/PermutationRef.h",
         "include/mlir/ExecutionEngine/SparseTensor/Storage.h",
     ],
     includes = ["include"],