diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp @@ -330,12 +330,16 @@ unsigned tensor = t->getOperandNumber(); for (unsigned i = 0; i < n; i++) if (merger.isDimLevelType(tensor, i, DimLvlType::kCompressed) || - merger.isDimLevelType(tensor, i, DimLvlType::kSingleton)) + merger.isDimLevelType(tensor, i, DimLvlType::kSingleton)) { for (unsigned j = 0; j < n; j++) if (merger.isDimLevelType(tensor, j, DimLvlType::kUndef)) { adjM[i][j] = true; inDegree[j]++; } + } else { + assert(merger.isDimLevelType(tensor, i, DimLvlType::kDense) || + merger.isDimLevelType(tensor, i, DimLvlType::kUndef)); + } } } // Topologically sort the iteration graph to determine loop order. @@ -377,6 +381,9 @@ merger.isDimLevelType(tensor, i, DimLvlType::kSingleton)) { allDense = false; break; + } else { + assert(merger.isDimLevelType(tensor, i, DimLvlType::kDense) || + merger.isDimLevelType(tensor, i, DimLvlType::kUndef)); } if (allDense) return true; @@ -537,14 +544,13 @@ } /// Local bufferization of all dense and sparse data structures. -/// This code enables testing the first prototype sparse compiler. -// TODO: replace this with a proliferated bufferization strategy static void genBuffers(Merger &merger, CodeGen &codegen, OpBuilder &builder, linalg::GenericOp op) { Location loc = op.getLoc(); assert(op.getNumInputsAndOutputs() == op.getNumInputs() + 1); // For every tensor, find lower and upper bound on dimensions, set the // same bounds on loop indices, and obtain dense or sparse buffer(s). + auto dynShape = {ShapedType::kDynamicSize}; SmallVector args; for (OpOperand *t : op.getInputAndOutputOperands()) { unsigned tensor = t->getOperandNumber(); @@ -558,21 +564,28 @@ if (a.getKind() != AffineExprKind::DimId) continue; // compound unsigned idx = a.cast().getPosition(); - // Handle sparse storage schemes. + // Handle the different storage schemes. if (merger.isDimLevelType(tensor, idx, DimLvlType::kCompressed)) { - auto dynShape = {ShapedType::kDynamicSize}; + // Compressed dimension, fetch pointer and indices. auto ptrTp = MemRefType::get(dynShape, getPointerOverheadType(builder, enc)); auto indTp = MemRefType::get(dynShape, getIndexOverheadType(builder, enc)); auto dim = builder.getIndexAttr(d); - // Generate sparse primitives to obtains pointer and indices. codegen.pointers[tensor][idx] = builder.create(loc, ptrTp, t->get(), dim); codegen.indices[tensor][idx] = builder.create(loc, indTp, t->get(), dim); } else if (merger.isDimLevelType(tensor, idx, DimLvlType::kSingleton)) { - llvm_unreachable("TODO: not implemented yet"); + // Singleton dimension, fetch indices. + auto indTp = + MemRefType::get(dynShape, getIndexOverheadType(builder, enc)); + auto dim = builder.getIndexAttr(d); + codegen.indices[tensor][idx] = + builder.create(loc, indTp, t->get(), dim); + } else { + // Dense dimension, nothing to fetch. + assert(merger.isDimLevelType(tensor, idx, DimLvlType::kDense)); } // Find upper bound in current dimension. unsigned p = perm(enc, d); @@ -598,14 +611,12 @@ } else if (t == codegen.sparseOut) { // True sparse output needs a lexIdx array. Value rank = constantIndex(builder, loc, op.getRank(t)); - auto dynShape = {ShapedType::kDynamicSize}; auto memTp = MemRefType::get(dynShape, builder.getIndexType()); codegen.lexIdx = builder.create(loc, memTp, rank); codegen.lexVal = builder.create( loc, MemRefType::get({}, elementType)); } else { // Annotated sparse tensors. - auto dynShape = {ShapedType::kDynamicSize}; auto sparseTp = MemRefType::get(dynShape, elementType); codegen.buffers[tensor] = builder.create(loc, sparseTp, t->get()); @@ -1175,29 +1186,45 @@ // Initialize sparse positions. for (unsigned b = 0, be = inits.size(); b < be; b++) { - if (inits[b]) { - unsigned tensor = merger.tensor(b); - assert(idx == merger.index(b)); - if (merger.isDimLevelType(b, DimLvlType::kCompressed)) { - // Initialize sparse index. - unsigned pat = at; - for (; pat != 0; pat--) { - if (codegen.pidxs[tensor][topSort[pat - 1]]) - break; - } - Value ptr = codegen.pointers[tensor][idx]; - Value one = constantIndex(builder, loc, 1); - Value p0 = (pat == 0) ? constantIndex(builder, loc, 0) - : codegen.pidxs[tensor][topSort[pat - 1]]; - codegen.pidxs[tensor][idx] = genLoad(codegen, builder, loc, ptr, p0); - Value p1 = builder.create(loc, p0, one); - codegen.highs[tensor][idx] = genLoad(codegen, builder, loc, ptr, p1); - } else if (merger.isDimLevelType(b, DimLvlType::kSingleton)) { - llvm_unreachable("TODO: not implemented yet"); - } else { - // Dense index still in play. - needsUniv = true; + if (!inits[b]) + continue; + unsigned tensor = merger.tensor(b); + assert(idx == merger.index(b)); + if (merger.isDimLevelType(b, DimLvlType::kCompressed)) { + // Initialize sparse index that will implement the iteration: + // for pidx_idx = pointers(pidx_idx-1), pointers(1+pidx_idx-1) + unsigned pat = at; + for (; pat != 0; pat--) { + if (codegen.pidxs[tensor][topSort[pat - 1]]) + break; } + Value ptr = codegen.pointers[tensor][idx]; + Value one = constantIndex(builder, loc, 1); + Value p0 = (pat == 0) ? constantIndex(builder, loc, 0) + : codegen.pidxs[tensor][topSort[pat - 1]]; + codegen.pidxs[tensor][idx] = genLoad(codegen, builder, loc, ptr, p0); + Value p1 = builder.create(loc, p0, one); + codegen.highs[tensor][idx] = genLoad(codegen, builder, loc, ptr, p1); + } else if (merger.isDimLevelType(b, DimLvlType::kSingleton)) { + // Initialize sparse index that will implement the "iteration": + // for pidx_idx = pidx_idx-1, 1+pidx_idx-1 + // We rely on subsequent loop unrolling to get rid of the loop + // if it is not involved in co-iteration with anything else. + unsigned pat = at; + for (; pat != 0; pat--) { + if (codegen.pidxs[tensor][topSort[pat - 1]]) + break; + } + Value one = constantIndex(builder, loc, 1); + Value p0 = (pat == 0) ? constantIndex(builder, loc, 0) + : codegen.pidxs[tensor][topSort[pat - 1]]; + codegen.pidxs[tensor][idx] = p0; + codegen.highs[tensor][idx] = builder.create(loc, p0, one); + } else { + assert(merger.isDimLevelType(b, DimLvlType::kDense) || + merger.isDimLevelType(b, DimLvlType::kUndef)); + // Dense index still in play. + needsUniv = true; } } @@ -1284,15 +1311,13 @@ assert(idx == merger.index(fb)); auto iteratorTypes = op.iterator_types().getValue(); bool isReduction = linalg::isReductionIterator(iteratorTypes[idx]); - bool isSparse = merger.isDimLevelType(fb, DimLvlType::kCompressed); + bool isSparse = merger.isDimLevelType(fb, DimLvlType::kCompressed) || + merger.isDimLevelType(fb, DimLvlType::kSingleton); bool isVector = isVectorFor(codegen, isInner, isReduction, isSparse) && denseUnitStrides(merger, op, idx); bool isParallel = isParallelFor(codegen, isOuter, isReduction, isSparse, isVector); - assert(!merger.isDimLevelType(fb, DimLvlType::kSingleton) && - "TODO: implement"); - // Prepare vector length. if (isVector) codegen.curVecLength = codegen.options.vectorLength; @@ -1360,11 +1385,17 @@ // Construct the while-loop with a parameter for each index. Type indexType = builder.getIndexType(); for (unsigned b = 0, be = indices.size(); b < be; b++) { - if (indices[b] && merger.isDimLevelType(b, DimLvlType::kCompressed)) { + if (!indices[b]) + continue; + if (merger.isDimLevelType(b, DimLvlType::kCompressed) || + merger.isDimLevelType(b, DimLvlType::kSingleton)) { unsigned tensor = merger.tensor(b); assert(idx == merger.index(b)); types.push_back(indexType); operands.push_back(codegen.pidxs[tensor][idx]); + } else { + assert(merger.isDimLevelType(b, DimLvlType::kDense) || + merger.isDimLevelType(b, DimLvlType::kUndef)); } } if (codegen.redVal) { @@ -1393,8 +1424,10 @@ Value cond; unsigned o = 0; for (unsigned b = 0, be = indices.size(); b < be; b++) { - // TODO: singleton - if (indices[b] && merger.isDimLevelType(b, DimLvlType::kCompressed)) { + if (!indices[b]) + continue; + if (merger.isDimLevelType(b, DimLvlType::kCompressed) || + merger.isDimLevelType(b, DimLvlType::kSingleton)) { unsigned tensor = merger.tensor(b); assert(idx == merger.index(b)); Value op1 = before->getArgument(o); @@ -1403,6 +1436,9 @@ op1, op2); cond = cond ? builder.create(loc, cond, opc) : opc; codegen.pidxs[tensor][idx] = after->getArgument(o++); + } else { + assert(merger.isDimLevelType(b, DimLvlType::kDense) || + merger.isDimLevelType(b, DimLvlType::kUndef)); } } if (codegen.redVal) @@ -1442,8 +1478,10 @@ // Initialize sparse indices. Value min; for (unsigned b = 0, be = locals.size(); b < be; b++) { - // TODO: singleton - if (locals[b] && merger.isDimLevelType(b, DimLvlType::kCompressed)) { + if (!locals[b]) + continue; + if (merger.isDimLevelType(b, DimLvlType::kCompressed) || + merger.isDimLevelType(b, DimLvlType::kSingleton)) { unsigned tensor = merger.tensor(b); assert(idx == merger.index(b)); Value ptr = codegen.indices[tensor][idx]; @@ -1459,6 +1497,9 @@ min = load; } } + } else { + assert(merger.isDimLevelType(b, DimLvlType::kDense) || + merger.isDimLevelType(b, DimLvlType::kUndef)); } } @@ -1531,8 +1572,10 @@ SmallVector operands; Value one = constantIndex(builder, loc, 1); for (unsigned b = 0, be = induction.size(); b < be; b++) { - // TODO: singleton - if (induction[b] && merger.isDimLevelType(b, DimLvlType::kCompressed)) { + if (!induction[b]) + continue; + if (merger.isDimLevelType(b, DimLvlType::kCompressed) || + merger.isDimLevelType(b, DimLvlType::kSingleton)) { unsigned tensor = merger.tensor(b); assert(idx == merger.index(b)); Value op1 = codegen.idxs[tensor][idx]; @@ -1543,6 +1586,9 @@ Value add = builder.create(loc, op3, one); operands.push_back(builder.create(loc, cmp, add, op3)); codegen.pidxs[tensor][idx] = whileOp->getResult(o++); + } else { + assert(merger.isDimLevelType(b, DimLvlType::kDense) || + merger.isDimLevelType(b, DimLvlType::kUndef)); } } if (codegen.redVal) { @@ -1592,21 +1638,23 @@ SmallVector types; Value cond; for (unsigned b = 0, be = conditions.size(); b < be; b++) { - if (conditions[b]) { - unsigned tensor = merger.tensor(b); - assert(idx == merger.index(b)); - Value clause; - // TODO: singleton - if (merger.isDimLevelType(b, DimLvlType::kCompressed)) { - Value op1 = codegen.idxs[tensor][idx]; - Value op2 = codegen.loops[idx]; - clause = builder.create(loc, arith::CmpIPredicate::eq, - op1, op2); - } else { - clause = constantI1(builder, loc, true); - } - cond = cond ? builder.create(loc, cond, clause) : clause; + if (!conditions[b]) + continue; + unsigned tensor = merger.tensor(b); + assert(idx == merger.index(b)); + Value clause; + if (merger.isDimLevelType(b, DimLvlType::kCompressed) || + merger.isDimLevelType(b, DimLvlType::kSingleton)) { + Value op1 = codegen.idxs[tensor][idx]; + Value op2 = codegen.loops[idx]; + clause = builder.create(loc, arith::CmpIPredicate::eq, op1, + op2); + } else { + assert(merger.isDimLevelType(b, DimLvlType::kDense) || + merger.isDimLevelType(b, DimLvlType::kUndef)); + clause = constantI1(builder, loc, true); } + cond = cond ? builder.create(loc, cond, clause) : clause; } if (codegen.redVal) types.push_back(codegen.redVal.getType()); diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -273,8 +273,7 @@ assert(rank > 0 && "Trivial shape is unsupported"); for (uint64_t r = 0; r < rank; r++) { assert(dimSizes[r] > 0 && "Dimension size zero has trivial storage"); - assert((dimTypes[r] == DimLevelType::kDense || - dimTypes[r] == DimLevelType::kCompressed) && + assert((isDenseDim(r) || isCompressedDim(r) || isSingletonDim(r)) && "Unsupported DimLevelType"); } // Construct the "reverse" (i.e., inverse) permutation. @@ -303,10 +302,66 @@ /// Getter for the dimension-types array, in storage-order. const std::vector &getDimTypes() const { return dimTypes; } + /// Safely check if the (storage-order) dimension uses dense storage. + bool isDenseDim(uint64_t d) const { + assert(d < getRank()); + return dimTypes[d] == DimLevelType::kDense; + } + /// Safely check if the (storage-order) dimension uses compressed storage. bool isCompressedDim(uint64_t d) const { assert(d < getRank()); - return (dimTypes[d] == DimLevelType::kCompressed); + switch (dimTypes[d]) { + case DimLevelType::kCompressed: + case DimLevelType::kCompressedNu: + case DimLevelType::kCompressedNo: + case DimLevelType::kCompressedNuNo: + return true; + default: + return false; + } + } + + /// Safely check if the (storage-order) dimension uses singleton storage. + bool isSingletonDim(uint64_t d) const { + assert(d < getRank()); + switch (dimTypes[d]) { + case DimLevelType::kSingleton: + case DimLevelType::kSingletonNu: + case DimLevelType::kSingletonNo: + case DimLevelType::kSingletonNuNo: + return true; + default: + return false; + } + } + + /// Safely check if the (storage-order) dimension is ordered. + bool isOrderedDim(uint64_t d) const { + assert(d < getRank()); + switch (dimTypes[d]) { + case DimLevelType::kCompressedNo: + case DimLevelType::kCompressedNuNo: + case DimLevelType::kSingletonNo: + case DimLevelType::kSingletonNuNo: + return false; + default: + return true; + } + } + + /// Safely check if the (storage-order) dimension is unique. + bool isUniqueDim(uint64_t d) const { + assert(d < getRank()); + switch (dimTypes[d]) { + case DimLevelType::kCompressedNu: + case DimLevelType::kCompressedNuNo: + case DimLevelType::kSingletonNu: + case DimLevelType::kSingletonNuNo: + return false; + default: + return true; + } } /// Allocate a new enumerator. @@ -422,7 +477,12 @@ indices[r].reserve(sz); sz = 1; allDense = false; + } else if (isSingletonDim(r)) { + indices[r].reserve(sz); + sz = 1; + allDense = false; } else { // Dense dimension. + assert(isDenseDim(r)); sz = checkedMul(sz, getDimSizes()[r]); } } @@ -612,11 +672,12 @@ /// where `full` is the number of "entries" already written to `values` /// for this segment (aka one after the highest index previously appended). void appendIndex(uint64_t d, uint64_t full, uint64_t i) { - if (isCompressedDim(d)) { + if (isCompressedDim(d) || isSingletonDim(d)) { assert(i <= std::numeric_limits::max() && "Index value is too large for the I-type"); indices[d].push_back(static_cast(i)); } else { // Dense dimension. + assert(isDenseDim(d)); assert(i >= full && "Index was already filled"); if (i == full) return; // Short-circuit, since it'll be a nop. @@ -681,7 +742,8 @@ // Find segment in interval with same index elements in this dimension. uint64_t i = elements[lo].indices[d]; uint64_t seg = lo + 1; - while (seg < hi && elements[seg].indices[d] == i) + bool merge = isUniqueDim(d); + while (merge && seg < hi && elements[seg].indices[d] == i) seg++; // Handle segment in interval for sparse or dense dimension. appendIndex(d, full, i); @@ -700,7 +762,10 @@ return; // Short-circuit, since it'll be a nop. if (isCompressedDim(d)) { appendPointer(d, indices[d].size(), count); + } else if (isSingletonDim(d)) { + return; } else { // Dense dimension. + assert(isDenseDim(d)); const uint64_t sz = getDimSizes()[d]; assert(sz >= full && "Segment is overfull"); count = checkedMul(count, sz - full); @@ -882,7 +947,10 @@ cursorReordD = static_cast(indicesD[pos]); forallElements(yield, pos, d + 1); } + } else if (src.isSingletonDim(d)) { + FATAL("unsupported dimension level type"); } else { // Dense dimension. + assert(src.isDenseDim(d)); const uint64_t sz = src.getDimSizes()[d]; const uint64_t pstart = parentPos * sz; uint64_t &cursorReordD = this->cursor[this->reord[d]]; @@ -1069,7 +1137,9 @@ pointers[r][parentPos]++; writeIndex(r, currentPos, ind[r]); parentPos = currentPos; + } else if (isSingletonDim(r)) { } else { // Dense dimension. + assert(isDenseDim(r)); parentPos = parentPos * getDimSizes()[r] + ind[r]; } parentSz = assembledSize(parentSz, r); diff --git a/mlir/test/Dialect/SparseTensor/sorted_coo.mlir b/mlir/test/Dialect/SparseTensor/sorted_coo.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/SparseTensor/sorted_coo.mlir @@ -0,0 +1,96 @@ +// RUN: mlir-opt %s -sparsification | FileCheck %s + +#SortedCOO = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed-nu", "singleton" ] +}> + +#trait_scale = { + indexing_maps = [ + affine_map<(i,j) -> (i,j)> // X (out) + ], + iterator_types = ["parallel", "parallel"], + doc = "X(i,j) = X(i,j) * 2.0" +} + +#trait_matvec = { + indexing_maps = [ + affine_map<(i,j) -> (i,j)>, // A + affine_map<(i,j) -> (j)>, // b + affine_map<(i,j) -> (i)> // x (out) + ], + iterator_types = ["parallel","reduction"], + doc = "x(i) += A(i,j) * b(j)" +} + +// +// Two kernels that operate on SortedCOO format. +// + +// CHECK-LABEL: func.func @sparse_scale( +// CHECK-SAME: %[[VAL_0:.*]]: tensor>) -> tensor> { +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 2.000000e+00 : f32 +// CHECK-DAG: %[[VAL_4:.*]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 0 : index} : tensor> to memref +// CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_0]] : tensor> to memref +// CHECK: %[[VAL_6:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_1]]] : memref +// CHECK: %[[VAL_7:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref +// CHECK: scf.for %[[VAL_8:.*]] = %[[VAL_6]] to %[[VAL_7]] step %[[VAL_2]] { +// CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_8]]] : memref +// CHECK: %[[VAL_10:.*]] = arith.mulf %[[VAL_9]], %[[VAL_3]] : f32 +// CHECK: memref.store %[[VAL_10]], %[[VAL_5]]{{\[}}%[[VAL_8]]] : memref +// CHECK: } +// CHECK: %[[VAL_11:.*]] = sparse_tensor.load %[[VAL_0]] : tensor> +// CHECK: return %[[VAL_11]] : tensor> +// CHECK: } +func.func @sparse_scale(%argx: tensor) -> tensor { + %c = arith.constant 2.0 : f32 + %0 = linalg.generic #trait_scale + outs(%argx: tensor) { + ^bb(%x: f32): + %1 = arith.mulf %x, %c : f32 + linalg.yield %1 : f32 + } -> tensor + return %0 : tensor +} + +// CHECK-LABEL: func.func @matvec( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ] }>>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor<64xf64>, +// CHECK-SAME: %[[VAL_2:.*]]: tensor<32xf64>) -> tensor<32xf64> { +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 0 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ] }>> to memref +// CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 0 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ] }>> to memref +// CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 1 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ] }>> to memref +// CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ] }>> to memref +// CHECK: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<64xf64> +// CHECK: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64> +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref +// CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_11]] to %[[VAL_12]] step %[[VAL_4]] { +// CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_13]]] : memref +// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_14]]] : memref<32xf64> +// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref +// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_13]]] : memref +// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_16]]] : memref<64xf64> +// CHECK: %[[VAL_19:.*]] = arith.mulf %[[VAL_17]], %[[VAL_18]] : f64 +// CHECK: %[[VAL_20:.*]] = arith.addf %[[VAL_15]], %[[VAL_19]] : f64 +// CHECK: memref.store %[[VAL_20]], %[[VAL_10]]{{\[}}%[[VAL_14]]] : memref<32xf64> +// CHECK: } +// CHECK: %[[VAL_21:.*]] = bufferization.to_tensor %[[VAL_10]] : memref<32xf64> +// CHECK: return %[[VAL_21]] : tensor<32xf64> +// CHECK: } +func.func @matvec(%arga: tensor<32x64xf64, #SortedCOO>, + %argb: tensor<64xf64>, + %argx: tensor<32xf64>) -> tensor<32xf64> { + %0 = linalg.generic #trait_matvec + ins(%arga, %argb : tensor<32x64xf64, #SortedCOO>, tensor<64xf64>) + outs(%argx: tensor<32xf64>) { + ^bb(%A: f64, %b: f64, %x: f64): + %0 = arith.mulf %A, %b : f64 + %1 = arith.addf %x, %0 : f64 + linalg.yield %1 : f64 + } -> tensor<32xf64> + return %0 : tensor<32xf64> +} diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir @@ -0,0 +1,230 @@ +// RUN: mlir-opt %s --sparse-compiler | \ +// RUN: TENSOR0="%mlir_src_dir/test/Integration/data/wide.mtx" \ +// RUN: TENSOR1="%mlir_src_dir/test/Integration/data/mttkrp_b.tns" \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +!Filename = !llvm.ptr + +#SortedCOO = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed-nu", "singleton" ] +}> + +#SortedCOOPermuted = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed-nu", "singleton" ], + dimOrdering = affine_map<(i,j) -> (j,i)> +}> + +#SortedCOO3D = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed-nu", "singleton-nu", "singleton" ] +}> + +#SortedCOO3DPermuted = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed-nu", "singleton-nu", "singleton" ], + dimOrdering = affine_map<(i,j,k) -> (k,i,j)> +}> + +#trait_scale = { + indexing_maps = [ + affine_map<(i,j) -> (i,j)> // X (out) + ], + iterator_types = ["parallel", "parallel"], + doc = "X(i,j) = X(i,j) * 2.0" +} + +// +// Tests reading in matrix/tensor from file into Sorted COO formats +// as well as applying various operations to this format. +// +module { + + func.func private @getTensorFilename(index) -> (!Filename) + + // + // A kernel that scales a sparse matrix A by a factor of 2.0. + // + func.func @sparse_scale(%argx: tensor) + -> tensor { + %c = arith.constant 2.0 : f64 + %0 = linalg.generic #trait_scale + outs(%argx: tensor) { + ^bb(%x: f64): + %1 = arith.mulf %x, %c : f64 + linalg.yield %1 : f64 + } -> tensor + return %0 : tensor + } + + func.func @dumpi(%arg0: memref) { + %c0 = arith.constant 0 : index + %v = vector.transfer_read %arg0[%c0], %c0: memref, vector<20xindex> + vector.print %v : vector<20xindex> + return + } + + func.func @dumpf(%arg0: memref) { + %c0 = arith.constant 0 : index + %nan = arith.constant 0x7FF0000001000000 : f64 + %v = vector.transfer_read %arg0[%c0], %nan: memref, vector<20xf64> + vector.print %v : vector<20xf64> + return + } + + func.func @entry() { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + + %fileName0 = call @getTensorFilename(%c0) : (index) -> (!Filename) + %fileName1 = call @getTensorFilename(%c1) : (index) -> (!Filename) + + // Read the sparse tensors from file, construct sparse storage. + %0 = sparse_tensor.new %fileName0 : !Filename to tensor + %1 = sparse_tensor.new %fileName0 : !Filename to tensor + %2 = sparse_tensor.new %fileName1 : !Filename to tensor + %3 = sparse_tensor.new %fileName1 : !Filename to tensor + + // Conversion from literal. + %m = arith.constant sparse< + [ [0,0], [1,3], [2,0], [2,3], [3,1], [4,1] ], + [6.0, 5.0, 4.0, 3.0, 2.0, 11.0 ] + > : tensor<5x4xf64> + %4 = sparse_tensor.convert %m : tensor<5x4xf64> to tensor + + // + // CHECK: ( 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 0, 0, 0, 1, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 126, 127, 254, 1, 253, 2, 0, 1, 3, 98, 126, 127, 128, 249, 253, 255, 0, 0, 0 ) + // CHECK-NEXT: ( -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17, nan, nan, nan ) + // + %p0 = sparse_tensor.pointers %0 { dimension = 0 : index } + : tensor to memref + %i00 = sparse_tensor.indices %0 { dimension = 0 : index } + : tensor to memref + %i01 = sparse_tensor.indices %0 { dimension = 1 : index } + : tensor to memref + %v0 = sparse_tensor.values %0 + : tensor to memref + call @dumpi(%p0) : (memref) -> () + call @dumpi(%i00) : (memref) -> () + call @dumpi(%i01) : (memref) -> () + call @dumpf(%v0) : (memref) -> () + + // + // CHECK-NEXT: ( 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 0, 1, 1, 2, 3, 98, 126, 126, 127, 127, 128, 249, 253, 253, 254, 255, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 3, 1, 3, 2, 3, 3, 0, 3, 0, 3, 3, 3, 1, 3, 0, 3, 0, 0, 0 ) + // CHECK-NEXT: ( -1, 8, -5, -9, -7, 10, -11, 2, 12, -3, -13, 14, -15, 6, 16, 4, -17, nan, nan, nan ) + // + %p1 = sparse_tensor.pointers %1 { dimension = 0 : index } + : tensor to memref + %i10 = sparse_tensor.indices %1 { dimension = 0 : index } + : tensor to memref + %i11 = sparse_tensor.indices %1 { dimension = 1 : index } + : tensor to memref + %v1 = sparse_tensor.values %1 + : tensor to memref + call @dumpi(%p1) : (memref) -> () + call @dumpi(%i10) : (memref) -> () + call @dumpi(%i11) : (memref) -> () + call @dumpf(%v1) : (memref) -> () + + // + // CHECK-NEXT: ( 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 0, 1, 1, 2, 2, 2, 2, 0, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 0, 1, 1, 2, 2, 2, 2, 0, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 0 ) + // CHECK-NEXT: ( 3, 63, 11, 100, 66, 61, 13, 43, 77, 10, 46, 61, 53, 3, 75, 22, 18, nan, nan, nan ) + // + %p2 = sparse_tensor.pointers %2 { dimension = 0 : index } + : tensor to memref + %i20 = sparse_tensor.indices %2 { dimension = 0 : index } + : tensor to memref + %i21 = sparse_tensor.indices %2 { dimension = 1 : index } + : tensor to memref + %i22 = sparse_tensor.indices %2 { dimension = 2 : index } + : tensor to memref + %v2 = sparse_tensor.values %2 + : tensor to memref + call @dumpi(%p2) : (memref) -> () + call @dumpi(%i20) : (memref) -> () + call @dumpi(%i21) : (memref) -> () + call @dumpi(%i21) : (memref) -> () + call @dumpf(%v2) : (memref) -> () + + // + // CHECK-NEXT: ( 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0 ) + // CHECK-NEXT: ( 66, 77, 61, 11, 61, 53, 22, 3, 100, 13, 10, 3, 18, 63, 43, 46, 75, nan, nan, nan ) + // + %p3 = sparse_tensor.pointers %3 { dimension = 0 : index } + : tensor to memref + %i30 = sparse_tensor.indices %3 { dimension = 0 : index } + : tensor to memref + %i31 = sparse_tensor.indices %3 { dimension = 1 : index } + : tensor to memref + %i32 = sparse_tensor.indices %3 { dimension = 2 : index } + : tensor to memref + %v3 = sparse_tensor.values %3 + : tensor to memref + call @dumpi(%p3) : (memref) -> () + call @dumpi(%i30) : (memref) -> () + call @dumpi(%i31) : (memref) -> () + call @dumpi(%i31) : (memref) -> () + call @dumpf(%v3) : (memref) -> () + + // + // CHECK-NEXT: ( 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 1, 2, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 3, 0, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 6, 5, 4, 3, 2, 11, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan ) + // + %p4 = sparse_tensor.pointers %4 { dimension = 0 : index } + : tensor to memref + %i40 = sparse_tensor.indices %4 { dimension = 0 : index } + : tensor to memref + %i41 = sparse_tensor.indices %4 { dimension = 1 : index } + : tensor to memref + %v4 = sparse_tensor.values %4 + : tensor to memref + call @dumpi(%p4) : (memref) -> () + call @dumpi(%i40) : (memref) -> () + call @dumpi(%i41) : (memref) -> () + call @dumpf(%v4) : (memref) -> () + + // And last but not least, an actual operation applied to COO. + // Note that this performs the operation "in place". + %5 = call @sparse_scale(%4) : (tensor) -> tensor + + // + // CHECK-NEXT: ( 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 1, 2, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 3, 0, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 12, 10, 8, 6, 4, 22, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan ) + // + %p5 = sparse_tensor.pointers %5 { dimension = 0 : index } + : tensor to memref + %i50 = sparse_tensor.indices %5 { dimension = 0 : index } + : tensor to memref + %i51 = sparse_tensor.indices %5 { dimension = 1 : index } + : tensor to memref + %v5 = sparse_tensor.values %5 + : tensor to memref + call @dumpi(%p5) : (memref) -> () + call @dumpi(%i50) : (memref) -> () + call @dumpi(%i51) : (memref) -> () + call @dumpf(%v5) : (memref) -> () + + // Release the resources. + bufferization.dealloc_tensor %0 : tensor + bufferization.dealloc_tensor %1 : tensor + bufferization.dealloc_tensor %2 : tensor + bufferization.dealloc_tensor %3 : tensor + bufferization.dealloc_tensor %4 : tensor + + return + } +}