diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h
@@ -26,14 +26,21 @@
 namespace mlir {
 namespace sparse_tensor {
 
-/// A sparse tensor element in coordinate scheme (value and indices).
-/// For example, a rank-1 vector element would look like
+/// An element of a sparse tensor in coordinate-scheme representation
+/// (i.e., a pair of indices and value).  For example, a rank-1 vector
+/// element would look like
 ///   ({i}, a[i])
-/// and a rank-5 tensor element like
+/// and a rank-5 tensor element would look like
 ///   ({i,j,k,l,m}, a[i,j,k,l,m])
-/// We use pointer to a shared index pool rather than e.g. a direct
-/// vector since that (1) reduces the per-element memory footprint, and
-/// (2) centralizes the memory reservation and (re)allocation to one place.
+///
+/// The indices are represented as a (non-owning) pointer into a shared
+/// pool of indices, rather than being stored directly in this object.
+/// This significantly improves performance because it: (1) reduces
+/// the per-element memory footprint, and (2) centralizes the memory
+/// management for indices.  The only downside is that the indices
+/// themselves cannot be retrieved without knowing the rank of the
+/// tensor to which this element belongs (and that rank is not stored
+/// in this object).
 template <typename V>
 struct Element final {
   Element(const uint64_t *ind, V val) : indices(ind), value(val){};
@@ -48,11 +55,11 @@
 using ElementConsumer =
     const std::function<void(const std::vector<uint64_t> &, V)> &;
 
-/// A memory-resident sparse tensor in coordinate scheme (collection of
-/// elements). This data structure is used to read a sparse tensor from
-/// any external format into memory and sort the elements lexicographically
-/// by indices before passing it back to the client (most packed storage
-/// formats require the elements to appear in lexicographic index order).
+/// A memory-resident sparse tensor in coordinate-scheme representation
+/// (a collection of `Element`s).  This data structure is used as
+/// an intermediate representation; e.g., for reading sparse tensors
+/// from external formats into memory, or for certain conversions between
+/// different `SparseTensorStorage` formats.
 template <typename V>
 class SparseTensorCOO final {
 public:
@@ -70,6 +77,8 @@
   /// fully permuted coordinate scheme.
   ///
   /// Precondition: `dimSizes` and `perm` must be valid for `rank`.
+  ///
+  /// Asserts: the elements of `dimSizes` are non-zero.
   static SparseTensorCOO<V> *newSparseTensorCOO(uint64_t rank,
                                                 const uint64_t *dimSizes,
                                                 const uint64_t *perm,
@@ -85,13 +94,21 @@
   /// Get the rank of the tensor.
   uint64_t getRank() const { return dimSizes.size(); }
 
-  /// Getter for the dimension-sizes array.
+  /// Get the dimension-sizes array.
   const std::vector<uint64_t> &getDimSizes() const { return dimSizes; }
 
-  /// Getter for the elements array.
+  /// Get the elements array.
   const std::vector<Element<V>> &getElements() const { return elements; }
 
-  /// Adds element as indices and value.
+  /// Adds an element to the tensor.  This method does not check whether
+  /// `ind` is already associated with a value, it adds it regardless.
+  /// Resolving such conflicts is left up to clients of the iterator
+  /// interface.
+  ///
+  /// Asserts:
+  /// * is not in iterator mode
+  /// * the `ind` is valid for `rank`
+  /// * the elements of `ind` are valid for `dimSizes`.
   void add(const std::vector<uint64_t> &ind, V val) {
     assert(!iteratorLocked && "Attempt to add() after startIterator()");
     const uint64_t *base = indices.data();
@@ -117,7 +134,10 @@
     elements.emplace_back(base + size, val);
   }
 
-  /// Sorts elements lexicographically by index.
+  /// Sorts elements lexicographically by index.  If an index is mapped to
+  /// multiple values, then the relative order of those values is unspecified.
+  ///
+  /// Asserts: is not in iterator mode.
   void sort() {
     assert(!iteratorLocked && "Attempt to sort() after startIterator()");
     // TODO: we may want to cache an `isSorted` bit, to avoid
@@ -134,13 +154,17 @@
               });
   }
 
-  /// Switch into iterator mode.
+  /// Switch into iterator mode.  If already in iterator mode, then
+  /// resets the position to the first element.
   void startIterator() {
     iteratorLocked = true;
     iteratorPos = 0;
   }
 
-  /// Get the next element.
+  /// Get the next element.  If there are no remaining elements, then
+  /// returns nullptr and switches out of iterator mode.
+  ///
+  /// Asserts: is in iterator mode.
   const Element<V> *getNext() {
     assert(iteratorLocked && "Attempt to getNext() before startIterator()");
     if (iteratorPos < elements.size())
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/CheckedMul.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/CheckedMul.h
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/CheckedMul.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/CheckedMul.h
@@ -29,7 +29,12 @@
 namespace sparse_tensor {
 namespace detail {
 
-/// A version of `operator*` on `uint64_t` which checks for overflows.
+// TODO: would be better to use various architectures' intrinsics to
+// detect the overflow directly, instead of doing the assertion beforehand
+// (which requires an expensive division).
+//
+/// A version of `operator*` on `uint64_t` which guards against overflows
+/// (when assertions are enabled).
 inline uint64_t checkedMul(uint64_t lhs, uint64_t rhs) {
   assert((lhs == 0 || rhs <= std::numeric_limits<uint64_t>::max() / lhs) &&
          "Integer overflow");
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/ErrorHandling.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/ErrorHandling.h
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/ErrorHandling.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/ErrorHandling.h
@@ -28,11 +28,12 @@
 #include <cstdio>
 #include <cstdlib>
 
-// This macro helps minimize repetition of this idiom, as well as ensuring
-// we have some additional output indicating where the error is coming from.
-// (Since `fprintf` doesn't provide a stacktrace, this helps make it easier
-// to track down whether an error is coming from our code vs somewhere else
-// in MLIR.)
+/// This macro helps minimize repetition of the printf-and-exit idiom,
+/// as well as ensuring that we print some additional output indicating
+/// where the error is coming from--- to make it easier to determine
+/// whether some particular error is coming from the runtime library's
+/// code vs from somewhere else in the MLIR stack.  (Since that can be
+/// hard to determine without the stacktraces provided by assertion failures.)
 #define MLIR_SPARSETENSOR_FATAL(...)                                           \
   do {                                                                         \
     fprintf(stderr, "SparseTensorUtils: " __VA_ARGS__);                        \
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
@@ -45,11 +45,14 @@
 class SparseTensorFile final {
 public:
   enum class ValueKind {
+    // The value before calling `readHeader`.
     kInvalid = 0,
+    // Values that can be set by `readMMEHeader`.
     kPattern = 1,
     kReal = 2,
     kInteger = 3,
     kComplex = 4,
+    // The value set by `readExtFROSTTHeader`.
     kUndefined = 5
   };
 
@@ -80,6 +83,7 @@
 
   ValueKind getValueKind() const { return valueKind_; }
 
+  /// Checks if a header has been successfully read.
   bool isValid() const { return valueKind_ != ValueKind::kInvalid; }
 
   /// Gets the MME "pattern" property setting.  Is only valid after
@@ -125,7 +129,13 @@
   void assertMatchesShape(uint64_t rank, const uint64_t *shape) const;
 
 private:
+  /// Read the MME header of a general sparse matrix of type real.
   void readMMEHeader();
+
+  /// Read the "extended" FROSTT header. Although not part of the
+  /// documented format, we assume that the file starts with optional
+  /// comments followed by two lines that define the rank, the number of
+  /// nonzeros, and the dimensions sizes (one per rank) of the sparse tensor.
   void readExtFROSTTHeader();
 
   static constexpr int kColWidth = 1025;
@@ -137,6 +147,7 @@
   char line[kColWidth];
 };
 
+//===----------------------------------------------------------------------===//
 namespace detail {
 
 // Adds a value to a tensor in coordinate scheme. If is_symmetric_value is true,
@@ -153,8 +164,8 @@
     coo->add({indices[1], indices[0]}, value);
 }
 
-// Reads an element of a complex type for the current indices in coordinate
-// scheme.
+/// Reads an element of a complex type for the current indices in
+/// coordinate scheme.
 template <typename V>
 inline void readCOOValue(SparseTensorCOO<std::complex<V>> *coo,
                          const std::vector<uint64_t> indices, char **linePtr,
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
@@ -87,7 +87,7 @@
   /// Get the rank of the tensor.
   uint64_t getRank() const { return dimSizes.size(); }
 
-  /// Getter for the dimension-sizes array, in storage-order.
+  /// Get the dimension-sizes array, in storage-order.
   const std::vector<uint64_t> &getDimSizes() const { return dimSizes; }
 
   /// Safely lookup the size of the given (storage-order) dimension.
@@ -96,11 +96,11 @@
     return dimSizes[d];
   }
 
-  /// Getter for the "reverse" permutation, which maps this object's
+  /// Get the "reverse" permutation, which maps this object's
   /// storage-order to the tensor's semantic-order.
   const std::vector<uint64_t> &getRev() const { return rev; }
 
-  /// Getter for the dimension-types array, in storage-order.
+  /// Get the dimension-types array, in storage-order.
   const std::vector<DimLevelType> &getDimTypes() const { return dimTypes; }
 
   /// Safely check if the (storage-order) dimension uses dense storage.
@@ -172,11 +172,13 @@
   FOREVERY_V(DECL_NEWENUMERATOR)
 #undef DECL_NEWENUMERATOR
 
-  /// Overhead storage.
+  /// Pointers-overhead storage.
 #define DECL_GETPOINTERS(PNAME, P)                                             \
   virtual void getPointers(std::vector<P> **, uint64_t);
   FOREVERY_FIXED_O(DECL_GETPOINTERS)
 #undef DECL_GETPOINTERS
+
+  /// Indices-overhead storage.
 #define DECL_GETINDICES(INAME, I)                                              \
   virtual void getIndices(std::vector<I> **, uint64_t);
   FOREVERY_FIXED_O(DECL_GETINDICES)
@@ -214,11 +216,12 @@
 class SparseTensorEnumerator;
 
 /// A memory-resident sparse tensor using a storage scheme based on
-/// per-dimension sparse/dense annotations. This data structure provides a
-/// bufferized form of a sparse tensor type. In contrast to generating setup
-/// methods for each differently annotated sparse tensor, this method provides
-/// a convenient "one-size-fits-all" solution that simply takes an input tensor
-/// and annotations to implement all required setup in a general manner.
+/// per-dimension sparse/dense annotations.  This data structure provides
+/// a bufferized form of a sparse tensor type.  In contrast to generating
+/// setup methods for each differently annotated sparse tensor, this
+/// method provides a convenient "one-size-fits-all" solution that simply
+/// takes an input tensor and annotations to implement all required setup
+/// in a general manner.
 template <typename P, typename I, typename V>
 class SparseTensorStorage final : public SparseTensorStorageBase {
   /// Private constructor to share code between the other constructors.
@@ -340,6 +343,9 @@
       endPath(0);
   }
 
+  /// Allocate a new enumerator for this classes `<P,I,V>` types and
+  /// erase the `<P,I>` parts from the type.  Callers must make sure to
+  /// delete the enumerator when they're done with it.
   void newEnumerator(SparseTensorEnumeratorBase<V> **out, uint64_t rank,
                      const uint64_t *perm) const final {
     *out = new SparseTensorEnumerator<P, I, V>(*this, rank, perm);
diff --git a/mlir/lib/ExecutionEngine/SparseTensor/File.cpp b/mlir/lib/ExecutionEngine/SparseTensor/File.cpp
--- a/mlir/lib/ExecutionEngine/SparseTensor/File.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensor/File.cpp
@@ -83,7 +83,7 @@
            "Dimension size mismatch");
 }
 
-/// Helper to convert string to lower case.
+/// Helper to convert C-style strings (i.e., '\0' terminated) to lower case.
 static inline char *toLower(char *token) {
   for (char *c = token; *c; ++c)
     *c = tolower(*c);
diff --git a/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp b/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp
--- a/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensor/NNZ.cpp
@@ -20,6 +20,13 @@
 
 using namespace mlir::sparse_tensor;
 
+//===----------------------------------------------------------------------===//
+/// Allocate the statistics structure for the desired sizes and
+/// sparsity (in the target tensor's storage-order).  This constructor
+/// does not actually populate the statistics, however; for that see
+/// `initialize`.
+///
+/// Precondition: `dimSizes` must not contain zeros.
 SparseTensorNNZ::SparseTensorNNZ(const std::vector<uint64_t> &dimSizes,
                                  const std::vector<DimLevelType> &sparsity)
     : dimSizes(dimSizes), dimTypes(sparsity), nnz(getRank()) {
@@ -51,6 +58,10 @@
   }
 }
 
+/// Lexicographically enumerates all indicies for dimensions strictly
+/// less than `stopDim`, and passes their nnz statistic to the callback.
+/// Since our use-case only requires the statistic not the coordinates
+/// themselves, we do not bother to construct those coordinates.
 void SparseTensorNNZ::forallIndices(uint64_t stopDim,
                                     SparseTensorNNZ::NNZConsumer yield) const {
   assert(stopDim < getRank() && "Dimension out of bounds");
@@ -59,6 +70,11 @@
   forallIndices(yield, stopDim, 0, 0);
 }
 
+/// Adds a new element (i.e., increment its statistics).  We use
+/// a method rather than inlining into the lambda in `initialize`,
+/// to avoid spurious templating over `V`.  And this method is private
+/// to avoid needing to re-assert validity of `ind` (which is guaranteed
+/// by `forallElements`).
 void SparseTensorNNZ::add(const std::vector<uint64_t> &ind) {
   uint64_t parentPos = 0;
   for (uint64_t rank = getRank(), r = 0; r < rank; ++r) {
@@ -68,6 +84,7 @@
   }
 }
 
+/// Recursive component of the public `forallIndices`.
 void SparseTensorNNZ::forallIndices(SparseTensorNNZ::NNZConsumer yield,
                                     uint64_t stopDim, uint64_t parentPos,
                                     uint64_t d) const {
diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
--- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements a light-weight runtime support library that is useful
-// for sparse tensor manipulations. The functionality provided in this library
-// is meant to simplify benchmarking, testing, and debugging MLIR code that
-// operates on sparse tensors. The provided functionality is **not** part
-// of core MLIR, however.
+// This file implements a light-weight runtime support library for
+// manipulating sparse tensors from MLIR.  More specifically, it provides
+// C-API wrappers so that MLIR-generated code can call into the C++ runtime
+// support library.  The functionality provided in this library is meant
+// to simplify benchmarking, testing, and debugging of MLIR code operating
+// on sparse tensors.  However, the provided functionality is **not**
+// part of core MLIR itself.
 //
 // The following memory-resident sparse storage schemes are supported:
 //
@@ -57,9 +59,18 @@
 
 using namespace mlir::sparse_tensor;
 
+//===----------------------------------------------------------------------===//
+//
+// Implementation details for public functions, which don't have a good
+// place to live in the C++ library this file is wrapping.
+//
+//===----------------------------------------------------------------------===//
+
 namespace {
 
 /// Initializes sparse tensor from an external COO-flavored format.
+/// Used by `IMPL_CONVERTTOMLIRSPARSETENSOR`.
+// TODO: generalize beyond 64-bit indices.
 template <typename V>
 static SparseTensorStorage<uint64_t, uint64_t, V> *
 toMLIRSparseTensor(uint64_t rank, uint64_t nse, const uint64_t *shape,
@@ -98,6 +109,14 @@
 }
 
 /// Converts a sparse tensor to an external COO-flavored format.
+/// Used by `IMPL_CONVERTFROMMLIRSPARSETENSOR`.
+//
+// TODO: Currently, values are copied from SparseTensorStorage to
+// SparseTensorCOO, then to the output.  We may want to reduce the number
+// of copies.
+//
+// TODO: generalize beyond 64-bit indices, no dim ordering, all dimensions
+// compressed
 template <typename V>
 static void
 fromMLIRSparseTensor(const SparseTensorStorage<uint64_t, uint64_t, V> *tensor,
@@ -490,7 +509,6 @@
 }
 
 // We can't use `static_cast` here because `DimLevelType` is an enum-class.
-// TODO: generalize beyond 64-bit indices.
 #define IMPL_CONVERTTOMLIRSPARSETENSOR(VNAME, V)                               \
   void *convertToMLIRSparseTensor##VNAME(                                      \
       uint64_t rank, uint64_t nse, uint64_t *shape, V *values,                 \
@@ -501,12 +519,6 @@
 FOREVERY_V(IMPL_CONVERTTOMLIRSPARSETENSOR)
 #undef IMPL_CONVERTTOMLIRSPARSETENSOR
 
-// TODO: Currently, values are copied from SparseTensorStorage to
-// SparseTensorCOO, then to the output.  We may want to reduce the number
-// of copies.
-//
-// TODO: generalize beyond 64-bit indices, no dim ordering, all dimensions
-// compressed
 #define IMPL_CONVERTFROMMLIRSPARSETENSOR(VNAME, V)                             \
   void convertFromMLIRSparseTensor##VNAME(void *tensor, uint64_t *pRank,       \
                                           uint64_t *pNse, uint64_t **pShape,   \