diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h
@@ -36,8 +36,8 @@
 /// (2) centralizes the memory reservation and (re)allocation to one place.
 template <typename V>
 struct Element final {
-  Element(uint64_t *ind, V val) : indices(ind), value(val){};
-  uint64_t *indices; // pointer into shared index pool
+  Element(const uint64_t *ind, V val) : indices(ind), value(val){};
+  const uint64_t *indices; // pointer into shared index pool
   V value;
 };
 
@@ -75,7 +75,7 @@
                                                 const uint64_t *perm,
                                                 uint64_t capacity = 0) {
     std::vector<uint64_t> permsz(rank);
-    for (uint64_t r = 0; r < rank; r++) {
+    for (uint64_t r = 0; r < rank; ++r) {
       assert(dimSizes[r] > 0 && "Dimension size zero has trivial storage");
       permsz[perm[r]] = dimSizes[r];
     }
@@ -94,11 +94,11 @@
   /// Adds element as indices and value.
   void add(const std::vector<uint64_t> &ind, V val) {
     assert(!iteratorLocked && "Attempt to add() after startIterator()");
-    uint64_t *base = indices.data();
+    const uint64_t *base = indices.data();
     uint64_t size = indices.size();
     uint64_t rank = getRank();
     assert(ind.size() == rank && "Element rank mismatch");
-    for (uint64_t r = 0; r < rank; r++) {
+    for (uint64_t r = 0; r < rank; ++r) {
       assert(ind[r] < dimSizes[r] && "Index is too large for the dimension");
       indices.push_back(ind[r]);
     }
@@ -107,9 +107,9 @@
     // only happens if we did not set the initial capacity right, and then only
     // for every internal vector reallocation (which with the doubling rule
     // should only incur an amortized linear overhead).
-    uint64_t *newBase = indices.data();
+    const uint64_t *newBase = indices.data();
     if (newBase != base) {
-      for (uint64_t i = 0, n = elements.size(); i < n; i++)
+      for (uint64_t i = 0, n = elements.size(); i < n; ++i)
         elements[i].indices = newBase + (elements[i].indices - base);
       base = newBase;
     }
@@ -125,7 +125,7 @@
     uint64_t rank = getRank();
     std::sort(elements.begin(), elements.end(),
               [rank](const Element<V> &e1, const Element<V> &e2) {
-                for (uint64_t r = 0; r < rank; r++) {
+                for (uint64_t r = 0; r < rank; ++r) {
                   if (e1.indices[r] == e2.indices[r])
                     continue;
                   return e1.indices[r] < e2.indices[r];
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
@@ -33,6 +33,13 @@
 namespace mlir {
 namespace sparse_tensor {
 
+// TODO: benchmark whether to keep various methods inline vs moving them
+// off to the cpp file.
+
+// TODO: consider distinguishing separate classes for before vs
+// after reading the header; so as to statically avoid the need
+// to `assert(isValid())`.
+
 /// This class abstracts over the information stored in file headers,
 /// as well as providing the buffers and methods for parsing those headers.
 class SparseTensorFile final {
@@ -46,7 +53,7 @@
     kUndefined = 5
   };
 
-  explicit SparseTensorFile(char *filename) : filename(filename) {
+  explicit SparseTensorFile(const char *filename) : filename(filename) {
     assert(filename && "Received nullptr for filename");
   }
 
@@ -109,7 +116,7 @@
   /// Safely gets the size of the given dimension.  Is only valid
   /// after parsing the header.
   uint64_t getDimSize(uint64_t d) const {
-    assert(d < getRank());
+    assert(d < getRank() && "Dimension out of bounds");
     return idata[2 + d];
   }
 
@@ -122,7 +129,7 @@
   void readExtFROSTTHeader();
 
   static constexpr int kColWidth = 1025;
-  const char *filename;
+  const char *const filename;
   FILE *file = nullptr;
   ValueKind valueKind_ = ValueKind::kInvalid;
   bool isSymmetric_ = false;
@@ -184,7 +191,7 @@
 /// sparse tensor in coordinate scheme.
 template <typename V>
 inline SparseTensorCOO<V> *
-openSparseTensorCOO(char *filename, uint64_t rank, const uint64_t *shape,
+openSparseTensorCOO(const char *filename, uint64_t rank, const uint64_t *shape,
                     const uint64_t *perm, PrimaryType valTp) {
   SparseTensorFile stfile(filename);
   stfile.openFile();
@@ -208,11 +215,12 @@
                                                      perm, nnz);
   // Read all nonzero elements.
   std::vector<uint64_t> indices(rank);
-  for (uint64_t k = 0; k < nnz; k++) {
+  for (uint64_t k = 0; k < nnz; ++k) {
     char *linePtr = stfile.readLine();
-    for (uint64_t r = 0; r < rank; r++) {
+    for (uint64_t r = 0; r < rank; ++r) {
+      // Parse the 1-based index.
       uint64_t idx = strtoul(linePtr, &linePtr, 10);
-      // Add 0-based index.
+      // Add the 0-based index.
       indices[perm[r]] = idx - 1;
     }
     detail::readCOOValue(coo, indices, &linePtr, stfile.isPattern(),
@@ -223,28 +231,25 @@
   return coo;
 }
 
-/// Writes the sparse tensor to `dest` in extended FROSTT format.
+/// Writes the sparse tensor to `filename` in extended FROSTT format.
 template <typename V>
-inline void outSparseTensor(void *tensor, void *dest, bool sort) {
-  assert(tensor && dest);
-  auto coo = static_cast<SparseTensorCOO<V> *>(tensor);
-  if (sort)
-    coo->sort();
-  char *filename = static_cast<char *>(dest);
-  auto &dimSizes = coo->getDimSizes();
-  auto &elements = coo->getElements();
-  uint64_t rank = coo->getRank();
-  uint64_t nnz = elements.size();
+inline void writeExtFROSTT(const SparseTensorCOO<V> &coo,
+                           const char *filename) {
+  assert(filename && "Got nullptr for filename");
+  auto &dimSizes = coo.getDimSizes();
+  auto &elements = coo.getElements();
+  const uint64_t rank = coo.getRank();
+  const uint64_t nnz = elements.size();
   std::fstream file;
   file.open(filename, std::ios_base::out | std::ios_base::trunc);
   assert(file.is_open());
   file << "; extended FROSTT format\n" << rank << " " << nnz << std::endl;
-  for (uint64_t r = 0; r < rank - 1; r++)
+  for (uint64_t r = 0; r < rank - 1; ++r)
     file << dimSizes[r] << " ";
   file << dimSizes[rank - 1] << std::endl;
-  for (uint64_t i = 0; i < nnz; i++) {
+  for (uint64_t i = 0; i < nnz; ++i) {
     auto &idx = elements[i].indices;
-    for (uint64_t r = 0; r < rank; r++)
+    for (uint64_t r = 0; r < rank; ++r)
       file << (idx[r] + 1) << " ";
     file << elements[i].value << std::endl;
   }
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
@@ -41,11 +41,20 @@
 namespace mlir {
 namespace sparse_tensor {
 
+//===----------------------------------------------------------------------===//
 // This forward decl is sufficient to split `SparseTensorStorageBase` into
 // its own header, but isn't sufficient for `SparseTensorStorage` to join it.
 template <typename V>
 class SparseTensorEnumeratorBase;
 
+// These macros ensure consistent error messages, without risk of incuring
+// an additional method call to do so.
+#define ASSERT_VALID_DIM(d)                                                    \
+  assert(d < getRank() && "Dimension index is out of bounds");
+#define ASSERT_COMPRESSED_DIM(d)                                               \
+  assert(isCompressedDim(d) && "Dimension is not compressed");
+#define ASSERT_DENSE_DIM(d) assert(isDenseDim(d) && "Dimension is not dense");
+
 /// Abstract base class for `SparseTensorStorage<P,I,V>`.  This class
 /// takes responsibility for all the `<P,I,V>`-independent aspects
 /// of the tensor (e.g., shape, sparsity, permutation).  In addition,
@@ -82,7 +91,7 @@
 
   /// Safely lookup the size of the given (storage-order) dimension.
   uint64_t getDimSize(uint64_t d) const {
-    assert(d < getRank());
+    ASSERT_VALID_DIM(d);
     return dimSizes[d];
   }
 
@@ -95,13 +104,13 @@
 
   /// Safely check if the (storage-order) dimension uses dense storage.
   bool isDenseDim(uint64_t d) const {
-    assert(d < getRank());
+    ASSERT_VALID_DIM(d);
     return dimTypes[d] == DimLevelType::kDense;
   }
 
   /// Safely check if the (storage-order) dimension uses compressed storage.
   bool isCompressedDim(uint64_t d) const {
-    assert(d < getRank());
+    ASSERT_VALID_DIM(d);
     switch (dimTypes[d]) {
     case DimLevelType::kCompressed:
     case DimLevelType::kCompressedNu:
@@ -115,7 +124,7 @@
 
   /// Safely check if the (storage-order) dimension uses singleton storage.
   bool isSingletonDim(uint64_t d) const {
-    assert(d < getRank());
+    ASSERT_VALID_DIM(d);
     switch (dimTypes[d]) {
     case DimLevelType::kSingleton:
     case DimLevelType::kSingletonNu:
@@ -129,7 +138,7 @@
 
   /// Safely check if the (storage-order) dimension is ordered.
   bool isOrderedDim(uint64_t d) const {
-    assert(d < getRank());
+    ASSERT_VALID_DIM(d);
     switch (dimTypes[d]) {
     case DimLevelType::kCompressedNo:
     case DimLevelType::kCompressedNuNo:
@@ -143,7 +152,7 @@
 
   /// Safely check if the (storage-order) dimension is unique.
   bool isUniqueDim(uint64_t d) const {
-    assert(d < getRank());
+    ASSERT_VALID_DIM(d);
     switch (dimTypes[d]) {
     case DimLevelType::kCompressedNu:
     case DimLevelType::kCompressedNuNo:
@@ -193,10 +202,11 @@
 
 private:
   const std::vector<uint64_t> dimSizes;
-  std::vector<uint64_t> rev;
+  std::vector<uint64_t> rev; // conceptually `const`
   const std::vector<DimLevelType> dimTypes;
 };
 
+//===----------------------------------------------------------------------===//
 // This forward decl is necessary for defining `SparseTensorStorage`,
 // but isn't sufficient for splitting it off.
 template <typename P, typename I, typename V>
@@ -269,11 +279,11 @@
 
   /// Partially specialize these getter methods based on template types.
   void getPointers(std::vector<P> **out, uint64_t d) final {
-    assert(d < getRank());
+    ASSERT_VALID_DIM(d);
     *out = &pointers[d];
   }
   void getIndices(std::vector<I> **out, uint64_t d) final {
-    assert(d < getRank());
+    ASSERT_VALID_DIM(d);
     *out = &indices[d];
   }
   void getValues(std::vector<V> **out) final { *out = &values; }
@@ -304,18 +314,18 @@
     // Restore insertion path for first insert.
     const uint64_t lastDim = getRank() - 1;
     uint64_t index = added[0];
+    assert(filled[index] && "added index is not filled");
     cursor[lastDim] = index;
     lexInsert(cursor, values[index]);
-    assert(filled[index]);
     values[index] = 0;
     filled[index] = false;
     // Subsequent insertions are quick.
-    for (uint64_t i = 1; i < count; i++) {
+    for (uint64_t i = 1; i < count; ++i) {
       assert(index < added[i] && "non-lexicographic insertion");
       index = added[i];
+      assert(filled[index] && "added index is not filled");
       cursor[lastDim] = index;
       insPath(cursor, lastDim, added[i - 1] + 1, values[index]);
-      assert(filled[index]);
       values[index] = 0;
       filled[index] = false;
     }
@@ -360,7 +370,7 @@
   /// does not check that `pos` is semantically valid (i.e., larger than
   /// the previous position and smaller than `indices[d].capacity()`).
   void appendPointer(uint64_t d, uint64_t pos, uint64_t count = 1) {
-    assert(isCompressedDim(d));
+    ASSERT_COMPRESSED_DIM(d);
     assert(pos <= std::numeric_limits<P>::max() &&
            "Pointer value is too large for the P-type");
     pointers[d].insert(pointers[d].end(), count, static_cast<P>(pos));
@@ -381,7 +391,7 @@
              "Index value is too large for the I-type");
       indices[d].push_back(static_cast<I>(i));
     } else { // Dense dimension.
-      assert(isDenseDim(d));
+      ASSERT_DENSE_DIM(d);
       assert(i >= full && "Index was already filled");
       if (i == full)
         return; // Short-circuit, since it'll be a nop.
@@ -397,7 +407,7 @@
   /// does not check that `i` is semantically valid (i.e., in bounds
   /// for `dimSizes[d]` and not elsewhere occurring in the same segment).
   void writeIndex(uint64_t d, uint64_t pos, uint64_t i) {
-    assert(isCompressedDim(d));
+    ASSERT_COMPRESSED_DIM(d);
     // Subscript assignment to `std::vector` requires that the `pos`-th
     // entry has been initialized; thus we must be sure to check `size()`
     // here, instead of `capacity()` as would be ideal.
@@ -432,7 +442,7 @@
   ///     and pointwise less-than).
   void fromCOO(const std::vector<Element<V>> &elements, uint64_t lo,
                uint64_t hi, uint64_t d) {
-    uint64_t rank = getRank();
+    const uint64_t rank = getRank();
     assert(d <= rank && hi <= elements.size());
     // Once dimensions are exhausted, insert the numerical values.
     if (d == rank) {
@@ -444,11 +454,11 @@
     uint64_t full = 0;
     while (lo < hi) { // If `hi` is unchanged, then `lo < elements.size()`.
       // Find segment in interval with same index elements in this dimension.
-      uint64_t i = elements[lo].indices[d];
+      const uint64_t i = elements[lo].indices[d];
       uint64_t seg = lo + 1;
-      bool merge = isUniqueDim(d);
-      while (merge && seg < hi && elements[seg].indices[d] == i)
-        seg++;
+      if (isUniqueDim(d))
+        while (seg < hi && elements[seg].indices[d] == i)
+          ++seg;
       // Handle segment in interval for sparse or dense dimension.
       appendIndex(d, full, i);
       full = i + 1;
@@ -469,7 +479,7 @@
     } else if (isSingletonDim(d)) {
       return;
     } else { // Dense dimension.
-      assert(isDenseDim(d));
+      ASSERT_DENSE_DIM(d);
       const uint64_t sz = getDimSizes()[d];
       assert(sz >= full && "Segment is overfull");
       count = detail::checkedMul(count, sz - full);
@@ -486,9 +496,9 @@
 
   /// Wraps up a single insertion path, inner to outer.
   void endPath(uint64_t diff) {
-    uint64_t rank = getRank();
-    assert(diff <= rank);
-    for (uint64_t i = 0; i < rank - diff; i++) {
+    const uint64_t rank = getRank();
+    assert(diff <= rank && "Dimension-diff is out of bounds");
+    for (uint64_t i = 0; i < rank - diff; ++i) {
       const uint64_t d = rank - i - 1;
       finalizeSegment(d, idx[d] + 1);
     }
@@ -496,10 +506,10 @@
 
   /// Continues a single insertion path, outer to inner.
   void insPath(const uint64_t *cursor, uint64_t diff, uint64_t top, V val) {
-    uint64_t rank = getRank();
-    assert(diff < rank);
-    for (uint64_t d = diff; d < rank; d++) {
-      uint64_t i = cursor[d];
+    ASSERT_VALID_DIM(diff);
+    const uint64_t rank = getRank();
+    for (uint64_t d = diff; d < rank; ++d) {
+      const uint64_t i = cursor[d];
       appendIndex(d, top, i);
       top = 0;
       idx[d] = i;
@@ -509,7 +519,8 @@
 
   /// Finds the lexicographic differing dimension.
   uint64_t lexDiff(const uint64_t *cursor) const {
-    for (uint64_t r = 0, rank = getRank(); r < rank; r++)
+    const uint64_t rank = getRank();
+    for (uint64_t r = 0; r < rank; ++r)
       if (cursor[r] > idx[r])
         return r;
       else
@@ -529,6 +540,10 @@
   std::vector<uint64_t> idx; // index cursor for lexicographic insertion.
 };
 
+#undef ASSERT_COMPRESSED_DIM
+#undef ASSERT_VALID_DIM
+
+//===----------------------------------------------------------------------===//
 /// A (higher-order) function object for enumerating the elements of some
 /// `SparseTensorStorage` under a permutation.  That is, the `forallElements`
 /// method encapsulates the loop-nest for enumerating the elements of
@@ -566,7 +581,7 @@
     assert(rank == getRank() && "Permutation rank mismatch");
     const auto &rev = src.getRev();           // source-order -> semantic-order
     const auto &dimSizes = src.getDimSizes(); // in source storage-order
-    for (uint64_t s = 0; s < rank; s++) {     // `s` source storage-order
+    for (uint64_t s = 0; s < rank; ++s) {     // `s` source storage-order
       uint64_t t = perm[rev[s]];              // `t` target-order
       reord[s] = t;
       permsz[t] = dimSizes[s];
@@ -604,17 +619,19 @@
   std::vector<uint64_t> cursor; // in target order.
 };
 
+//===----------------------------------------------------------------------===//
 template <typename P, typename I, typename V>
 class SparseTensorEnumerator final : public SparseTensorEnumeratorBase<V> {
   using Base = SparseTensorEnumeratorBase<V>;
+  using StorageImpl = SparseTensorStorage<P, I, V>;
 
 public:
   /// Constructs an enumerator with the given permutation for mapping
   /// the semantic-ordering of dimensions to the desired target-ordering.
   ///
   /// Precondition: `perm` must be valid for `rank`.
-  SparseTensorEnumerator(const SparseTensorStorage<P, I, V> &tensor,
-                         uint64_t rank, const uint64_t *perm)
+  SparseTensorEnumerator(const StorageImpl &tensor, uint64_t rank,
+                         const uint64_t *perm)
       : Base(tensor, rank, perm) {}
 
   ~SparseTensorEnumerator() final = default;
@@ -628,8 +645,7 @@
   void forallElements(ElementConsumer<V> yield, uint64_t parentPos,
                       uint64_t d) {
     // Recover the `<P,I,V>` type parameters of `src`.
-    const auto &src =
-        static_cast<const SparseTensorStorage<P, I, V> &>(this->src);
+    const auto &src = static_cast<const StorageImpl &>(this->src);
     if (d == Base::getRank()) {
       assert(parentPos < src.values.size() &&
              "Value position is out of bounds");
@@ -647,18 +663,18 @@
       const std::vector<I> &indicesD = src.indices[d];
       assert(pstop <= indicesD.size() && "Index position is out of bounds");
       uint64_t &cursorReordD = this->cursor[this->reord[d]];
-      for (uint64_t pos = pstart; pos < pstop; pos++) {
+      for (uint64_t pos = pstart; pos < pstop; ++pos) {
         cursorReordD = static_cast<uint64_t>(indicesD[pos]);
         forallElements(yield, pos, d + 1);
       }
     } else if (src.isSingletonDim(d)) {
       MLIR_SPARSETENSOR_FATAL("unsupported dimension level type");
     } else { // Dense dimension.
-      assert(src.isDenseDim(d));
+      assert(src.isDenseDim(d)); // TODO: reuse the ASSERT_DENSE_DIM message
       const uint64_t sz = src.getDimSizes()[d];
       const uint64_t pstart = parentPos * sz;
       uint64_t &cursorReordD = this->cursor[this->reord[d]];
-      for (uint64_t i = 0; i < sz; i++) {
+      for (uint64_t i = 0; i < sz; ++i) {
         cursorReordD = i;
         forallElements(yield, pstart + i, d + 1);
       }
@@ -666,6 +682,7 @@
   }
 };
 
+//===----------------------------------------------------------------------===//
 /// Statistics regarding the number of nonzero subtensors in
 /// a source tensor, for direct sparse=>sparse conversion a la
 /// <https://arxiv.org/abs/2001.02609>.
@@ -733,49 +750,35 @@
 // Definitions of the ctors and factories of `SparseTensorStorage<P,I,V>`.
 
 namespace detail {
-
-// TODO: try to unify this with `SparseTensorFile::assertMatchesShape`
-// which is used by `openSparseTensorCOO`.  It's easy enough to resolve
-// the `std::vector` vs pointer mismatch for `dimSizes`; but it's trickier
-// to resolve the presence/absence of `perm` (without introducing extra
-// overhead), so perhaps the code duplication is unavoidable.
-//
 /// Asserts that the `dimSizes` (in target-order) under the `perm` (mapping
 /// semantic-order to target-order) are a refinement of the desired `shape`
 /// (in semantic-order).
 ///
 /// Precondition: `perm` and `shape` must be valid for `rank`.
-inline void assertPermutedSizesMatchShape(const std::vector<uint64_t> &dimSizes,
-                                          uint64_t rank, const uint64_t *perm,
-                                          const uint64_t *shape) {
-  assert(perm && shape);
-  assert(rank == dimSizes.size() && "Rank mismatch");
-  for (uint64_t r = 0; r < rank; r++)
-    assert((shape[r] == 0 || shape[r] == dimSizes[perm[r]]) &&
-           "Dimension size mismatch");
-}
-
+void assertPermutedSizesMatchShape(const std::vector<uint64_t> &dimSizes,
+                                   uint64_t rank, const uint64_t *perm,
+                                   const uint64_t *shape);
 } // namespace detail
 
 template <typename P, typename I, typename V>
 SparseTensorStorage<P, I, V> *SparseTensorStorage<P, I, V>::newSparseTensor(
     uint64_t rank, const uint64_t *shape, const uint64_t *perm,
     const DimLevelType *sparsity, SparseTensorCOO<V> *coo) {
-  SparseTensorStorage<P, I, V> *n = nullptr;
   if (coo) {
     const auto &coosz = coo->getDimSizes();
+#ifndef NDEBUG
     detail::assertPermutedSizesMatchShape(coosz, rank, perm, shape);
-    n = new SparseTensorStorage<P, I, V>(coosz, perm, sparsity, coo);
-  } else {
-    std::vector<uint64_t> permsz(rank);
-    for (uint64_t r = 0; r < rank; r++) {
-      assert(shape[r] > 0 && "Dimension size zero has trivial storage");
-      permsz[perm[r]] = shape[r];
-    }
-    // We pass the null `coo` to ensure we select the intended constructor.
-    n = new SparseTensorStorage<P, I, V>(permsz, perm, sparsity, coo);
+#endif
+    return new SparseTensorStorage<P, I, V>(coosz, perm, sparsity, coo);
+  }
+  // else
+  std::vector<uint64_t> permsz(rank);
+  for (uint64_t r = 0; r < rank; ++r) {
+    assert(shape[r] > 0 && "Dimension size zero has trivial storage");
+    permsz[perm[r]] = shape[r];
   }
-  return n;
+  // We pass the null `coo` to ensure we select the intended constructor.
+  return new SparseTensorStorage<P, I, V>(permsz, perm, sparsity, coo);
 }
 
 template <typename P, typename I, typename V>
@@ -786,7 +789,9 @@
   SparseTensorEnumeratorBase<V> *enumerator;
   source->newEnumerator(&enumerator, rank, perm);
   const auto &permsz = enumerator->permutedSizes();
+#ifndef NDEBUG
   detail::assertPermutedSizesMatchShape(permsz, rank, perm, shape);
+#endif
   auto *tensor =
       new SparseTensorStorage<P, I, V>(permsz, perm, sparsity, *source);
   delete enumerator;
@@ -805,7 +810,7 @@
   //       we should really use nnz and dense/sparse distribution.
   bool allDense = true;
   uint64_t sz = 1;
-  for (uint64_t r = 0, rank = getRank(); r < rank; r++) {
+  for (uint64_t r = 0, rank = getRank(); r < rank; ++r) {
     if (isCompressedDim(r)) {
       // TODO: Take a parameter between 1 and `dimSizes[r]`, and multiply
       // `sz` by that before reserving. (For now we just use 1.)
@@ -819,7 +824,7 @@
       sz = 1;
       allDense = false;
     } else { // Dense dimension.
-      assert(isDenseDim(r));
+      ASSERT_DENSE_DIM(r);
       sz = detail::checkedMul(sz, getDimSizes()[r]);
     }
   }
@@ -851,7 +856,7 @@
     nnz.initialize(*enumerator);
     // Initialize "pointers" overhead (and allocate "indices", "values").
     uint64_t parentSz = 1; // assembled-size (not dimension-size) of `r-1`.
-    for (uint64_t rank = getRank(), r = 0; r < rank; r++) {
+    for (uint64_t rank = getRank(), r = 0; r < rank; ++r) {
       if (isCompressedDim(r)) {
         pointers[r].reserve(parentSz + 1);
         pointers[r].push_back(0);
@@ -882,7 +887,7 @@
   // The yieldPos loop
   enumerator->forallElements([this](const std::vector<uint64_t> &ind, V val) {
     uint64_t parentSz = 1, parentPos = 0;
-    for (uint64_t rank = getRank(), r = 0; r < rank; r++) {
+    for (uint64_t rank = getRank(), r = 0; r < rank; ++r) {
       if (isCompressedDim(r)) {
         // If `parentPos == parentSz` then it's valid as an array-lookup;
         // however, it's semantically invalid here since that entry
@@ -900,7 +905,7 @@
       } else if (isSingletonDim(r)) {
         // the new parentPos equals the old parentPos.
       } else { // Dense dimension.
-        assert(isDenseDim(r));
+        ASSERT_DENSE_DIM(r);
         parentPos = parentPos * getDimSizes()[r] + ind[r];
       }
       parentSz = assembledSize(parentSz, r);
@@ -911,7 +916,7 @@
   // No longer need the enumerator, so we'll delete it ASAP.
   delete enumerator;
   // The finalizeYieldPos loop
-  for (uint64_t parentSz = 1, rank = getRank(), r = 0; r < rank; r++) {
+  for (uint64_t parentSz = 1, rank = getRank(), r = 0; r < rank; ++r) {
     if (isCompressedDim(r)) {
       assert(parentSz == pointers[r].size() - 1 &&
              "Actual pointers size doesn't match the expected size");
@@ -919,7 +924,7 @@
       assert(pointers[r][parentSz - 1] == pointers[r][parentSz] &&
              "Pointers got corrupted");
       // TODO: optimize this by using `memmove` or similar.
-      for (uint64_t n = 0; n < parentSz; n++) {
+      for (uint64_t n = 0; n < parentSz; ++n) {
         const uint64_t parentPos = parentSz - n;
         pointers[r][parentPos] = pointers[r][parentPos - 1];
       }
@@ -932,4 +937,6 @@
 } // namespace sparse_tensor
 } // namespace mlir
 
+#undef ASSERT_DENSE_DIM
+
 #endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_STORAGE_H
diff --git a/mlir/lib/ExecutionEngine/SparseTensor/File.cpp b/mlir/lib/ExecutionEngine/SparseTensor/File.cpp
--- a/mlir/lib/ExecutionEngine/SparseTensor/File.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensor/File.cpp
@@ -25,8 +25,6 @@
 
 #include "mlir/ExecutionEngine/SparseTensor/File.h"
 
-#ifdef MLIR_SPARSETENSOR_DEFINE_FUNCTIONS // We are building this library
-
 #include <cctype>
 #include <cstring>
 
@@ -80,7 +78,7 @@
 void SparseTensorFile::assertMatchesShape(uint64_t rank,
                                           const uint64_t *shape) const {
   assert(rank == getRank() && "Rank mismatch");
-  for (uint64_t r = 0; r < rank; r++)
+  for (uint64_t r = 0; r < rank; ++r)
     assert((shape[r] == 0 || shape[r] == idata[2 + r]) &&
            "Dimension size mismatch");
 }
@@ -152,12 +150,10 @@
   if (sscanf(line, "%" PRIu64 "%" PRIu64 "\n", idata, idata + 1) != 2)
     MLIR_SPARSETENSOR_FATAL("Cannot find metadata in %s\n", filename);
   // Followed by a line with the dimension sizes (one per rank).
-  for (uint64_t r = 0; r < idata[0]; r++)
+  for (uint64_t r = 0; r < idata[0]; ++r)
     if (fscanf(file, "%" PRIu64, idata + 2 + r) != 1)
       MLIR_SPARSETENSOR_FATAL("Cannot find dimension size %s\n", filename);
   readLine(); // end of line
   // The FROSTT format does not define the data type of the nonzero elements.
   valueKind_ = ValueKind::kUndefined;
 }
-
-#endif // MLIR_SPARSETENSOR_DEFINE_FUNCTIONS
diff --git a/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp b/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp
--- a/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp
@@ -18,8 +18,6 @@
 
 #include "mlir/ExecutionEngine/SparseTensor/Storage.h"
 
-#ifdef MLIR_SPARSETENSOR_DEFINE_FUNCTIONS // We are building this library
-
 using namespace mlir::sparse_tensor;
 
 SparseTensorNNZ::SparseTensorNNZ(const std::vector<uint64_t> &dimSizes,
@@ -55,7 +53,7 @@
 
 void SparseTensorNNZ::forallIndices(uint64_t stopDim,
                                     SparseTensorNNZ::NNZConsumer yield) const {
-  assert(stopDim < getRank() && "Stopping-dimension is out of bounds");
+  assert(stopDim < getRank() && "Dimension out of bounds");
   assert(dimTypes[stopDim] == DimLevelType::kCompressed &&
          "Cannot look up non-compressed dimensions");
   forallIndices(yield, stopDim, 0, 0);
@@ -63,7 +61,7 @@
 
 void SparseTensorNNZ::add(const std::vector<uint64_t> &ind) {
   uint64_t parentPos = 0;
-  for (uint64_t rank = getRank(), r = 0; r < rank; r++) {
+  for (uint64_t rank = getRank(), r = 0; r < rank; ++r) {
     if (dimTypes[r] == DimLevelType::kCompressed)
       nnz[r][parentPos]++;
     parentPos = parentPos * dimSizes[r] + ind[r];
@@ -84,5 +82,3 @@
       forallIndices(yield, stopDim, pstart + i, d + 1);
   }
 }
-
-#endif // MLIR_SPARSETENSOR_DEFINE_FUNCTIONS
diff --git a/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp b/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp
--- a/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp
@@ -12,6 +12,9 @@
 // no benefit).  Though this also helps ensure that we avoid weak-vtables:
 // <https://llvm.org/docs/CodingStandards.html#provide-a-virtual-method-anchor-for-classes-in-headers>
 //
+// (This file also contains the definition of `assertPermutedSizesMatchShape`
+// which is used by `SparseTensorStorage` factories.)
+//
 // This file is part of the lightweight runtime support library for sparse
 // tensor manipulations.  The functionality of the support library is meant
 // to simplify benchmarking, testing, and debugging MLIR code operating on
@@ -22,8 +25,6 @@
 
 #include "mlir/ExecutionEngine/SparseTensor/Storage.h"
 
-#ifdef MLIR_SPARSETENSOR_DEFINE_FUNCTIONS // We are building this library
-
 using namespace mlir::sparse_tensor;
 
 SparseTensorStorageBase::SparseTensorStorageBase(
@@ -31,17 +32,19 @@
     const DimLevelType *sparsity)
     : dimSizes(dimSizes), rev(getRank()),
       dimTypes(sparsity, sparsity + getRank()) {
-  assert(perm && sparsity);
+  assert(perm && "Got nullptr for permutation");
+  assert(sparsity && "Got nullptr for sparsity");
   const uint64_t rank = getRank();
   // Validate parameters.
   assert(rank > 0 && "Trivial shape is unsupported");
-  for (uint64_t r = 0; r < rank; r++) {
+  for (uint64_t r = 0; r < rank; ++r) {
     assert(dimSizes[r] > 0 && "Dimension size zero has trivial storage");
     assert((isDenseDim(r) || isCompressedDim(r) || isSingletonDim(r)) &&
            "Unsupported DimLevelType");
   }
   // Construct the "reverse" (i.e., inverse) permutation.
-  for (uint64_t r = 0; r < rank; r++)
+  // TODO: should move this computation off to the codegen
+  for (uint64_t r = 0; r < rank; ++r)
     rev[perm[r]] = r;
 }
 
@@ -97,4 +100,18 @@
 
 #undef FATAL_PIV
 
-#endif // MLIR_SPARSETENSOR_DEFINE_FUNCTIONS
+// TODO: try to unify this with `SparseTensorFile::assertMatchesShape`
+// (which is used by `openSparseTensorCOO`).  It's easy enough to resolve
+// the `std::vector` vs pointer mismatch for `dimSizes`; but it's trickier
+// to resolve the presence/absence of `perm` (without introducing extra
+// overhead), so perhaps the code duplication is unavoidable?
+void mlir::sparse_tensor::detail::assertPermutedSizesMatchShape(
+    const std::vector<uint64_t> &dimSizes, uint64_t rank, const uint64_t *perm,
+    const uint64_t *shape) {
+  assert(perm && "Got nullptr for permutation");
+  assert(shape && "Got nullptr for shape");
+  assert(rank == dimSizes.size() && "Rank mismatch");
+  for (uint64_t d = 0; d < rank; ++d)
+    assert((shape[d] == 0 || shape[d] == dimSizes[perm[d]]) &&
+           "Dimension size mismatch");
+}
diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
--- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
@@ -45,23 +45,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/ExecutionEngine/SparseTensorUtils.h"
+
+#ifdef MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS
+
 #include "mlir/ExecutionEngine/SparseTensor/COO.h"
 #include "mlir/ExecutionEngine/SparseTensor/ErrorHandling.h"
 #include "mlir/ExecutionEngine/SparseTensor/File.h"
 #include "mlir/ExecutionEngine/SparseTensor/Storage.h"
 
-#ifdef MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS
-
-#include <algorithm>
-#include <cassert>
-#include <cctype>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <fstream>
-#include <functional>
-#include <iostream>
-#include <limits>
 #include <numeric>
 
 using namespace mlir::sparse_tensor;
@@ -71,9 +62,9 @@
 /// Initializes sparse tensor from an external COO-flavored format.
 template <typename V>
 static SparseTensorStorage<uint64_t, uint64_t, V> *
-toMLIRSparseTensor(uint64_t rank, uint64_t nse, uint64_t *shape, V *values,
-                   uint64_t *indices, uint64_t *perm, uint8_t *sparse) {
-  const DimLevelType *sparsity = (DimLevelType *)(sparse);
+toMLIRSparseTensor(uint64_t rank, uint64_t nse, const uint64_t *shape,
+                   const V *values, const uint64_t *indices,
+                   const uint64_t *perm, const DimLevelType *sparsity) {
 #ifndef NDEBUG
   // Verify that perm is a permutation of 0..(rank-1).
   std::vector<uint64_t> order(perm, perm + rank);
@@ -108,16 +99,15 @@
 
 /// Converts a sparse tensor to an external COO-flavored format.
 template <typename V>
-static void fromMLIRSparseTensor(void *tensor, uint64_t *pRank, uint64_t *pNse,
-                                 uint64_t **pShape, V **pValues,
-                                 uint64_t **pIndices) {
-  assert(tensor);
-  auto sparseTensor =
-      static_cast<SparseTensorStorage<uint64_t, uint64_t, V> *>(tensor);
-  uint64_t rank = sparseTensor->getRank();
+static void
+fromMLIRSparseTensor(const SparseTensorStorage<uint64_t, uint64_t, V> *tensor,
+                     uint64_t *pRank, uint64_t *pNse, uint64_t **pShape,
+                     V **pValues, uint64_t **pIndices) {
+  assert(tensor && "Received nullptr for tensor");
+  uint64_t rank = tensor->getRank();
   std::vector<uint64_t> perm(rank);
   std::iota(perm.begin(), perm.end(), 0);
-  SparseTensorCOO<V> *coo = sparseTensor->toCOO(perm.data());
+  SparseTensorCOO<V> *coo = tensor->toCOO(perm.data());
 
   const std::vector<Element<V>> &elements = coo->getElements();
   uint64_t nse = elements.size();
@@ -458,7 +448,11 @@
 
 #define IMPL_OUTSPARSETENSOR(VNAME, V)                                         \
   void outSparseTensor##VNAME(void *coo, void *dest, bool sort) {              \
-    return outSparseTensor<V>(coo, dest, sort);                                \
+    assert(coo && "Got nullptr for COO object");                               \
+    auto &coo_ = *static_cast<SparseTensorCOO<V> *>(coo);                      \
+    if (sort)                                                                  \
+      coo_.sort();                                                             \
+    return writeExtFROSTT(coo_, static_cast<char *>(dest));                    \
   }
 FOREVERY_V(IMPL_OUTSPARSETENSOR)
 #undef IMPL_OUTSPARSETENSOR
@@ -495,13 +489,14 @@
   out->assign(dimSizes, dimSizes + rank);
 }
 
+// We can't use `static_cast` here because `DimLevelType` is an enum-class.
 // TODO: generalize beyond 64-bit indices.
 #define IMPL_CONVERTTOMLIRSPARSETENSOR(VNAME, V)                               \
   void *convertToMLIRSparseTensor##VNAME(                                      \
       uint64_t rank, uint64_t nse, uint64_t *shape, V *values,                 \
       uint64_t *indices, uint64_t *perm, uint8_t *sparse) {                    \
     return toMLIRSparseTensor<V>(rank, nse, shape, values, indices, perm,      \
-                                 sparse);                                      \
+                                 reinterpret_cast<DimLevelType *>(sparse));    \
   }
 FOREVERY_V(IMPL_CONVERTTOMLIRSPARSETENSOR)
 #undef IMPL_CONVERTTOMLIRSPARSETENSOR
@@ -516,7 +511,9 @@
   void convertFromMLIRSparseTensor##VNAME(void *tensor, uint64_t *pRank,       \
                                           uint64_t *pNse, uint64_t **pShape,   \
                                           V **pValues, uint64_t **pIndices) {  \
-    fromMLIRSparseTensor<V>(tensor, pRank, pNse, pShape, pValues, pIndices);   \
+    fromMLIRSparseTensor<V>(                                                   \
+        static_cast<SparseTensorStorage<uint64_t, uint64_t, V> *>(tensor),     \
+        pRank, pNse, pShape, pValues, pIndices);                               \
   }
 FOREVERY_V(IMPL_CONVERTFROMMLIRSPARSETENSOR)
 #undef IMPL_CONVERTFROMMLIRSPARSETENSOR