diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td @@ -49,26 +49,32 @@ let hasVerifier = 1; } -def SparseTensor_InitOp : SparseTensor_Op<"init", [NoSideEffect]>, - Arguments<(ins Variadic:$sizes)>, +def SparseTensor_InitOp : SparseTensor_Op<"init", + [NoSideEffect, + AttrSizedOperandSegments]>, + Arguments<(ins Variadic:$sizes, Optional:$startValue)>, Results<(outs AnyTensor:$result)> { - string summary = "Materializes an unitialized sparse tensor"; + string summary = "Materializes an uninitialized sparse tensor"; string description = [{ Materializes an uninitialized sparse tensor with given shape (either static or dynamic). The operation is provided as an anchor that materializes a properly typed but uninitialized sparse tensor into the output clause of a subsequent operation that yields a sparse tensor as the result. + When the computation performs a reduction or aggregation into the uninitialized + sparse tensor, the starting value of every element is assumed to be zero unless + explicitly declared using the optional `start_value`. + Example: ```mlir - %c = sparse_tensor.init_tensor [%d1, %d2] : tensor + %c = sparse_tensor.init [%d1, %d2] start_value=%cf0[f32] : tensor %0 = linalg.matmul ins(%a, %b: tensor, tensor) outs(%c: tensor) -> tensor ``` }]; - let assemblyFormat = "`[` $sizes `]` attr-dict `:` type($result)"; + let assemblyFormat = "`[` $sizes `]` ( `start_value` `=` $startValue^ `[` type($startValue) `]` )? attr-dict `:` type($result)"; let hasVerifier = 1; } diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -233,6 +233,11 @@ << shape[i]; } } + if (startValue()) { + Type svtype = startValue().getType(); + if (svtype != ttp.getElementType()) + return emitError("start_value must match type of tensor"); + } return success(); } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -473,8 +473,7 @@ // explicitly defined by the arguments to the init operator. SmallVector params; ShapedType stp = resType.cast(); - newParams(rewriter, params, op, stp, enc, Action::kEmpty, - adaptor.getOperands()); + newParams(rewriter, params, op, stp, enc, Action::kEmpty, adaptor.sizes()); rewriter.replaceOp(op, genNewCall(rewriter, op, params)); return success(); } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp @@ -704,14 +704,36 @@ static Value genInsertionLoad(CodeGen &codegen, OpBuilder &builder, linalg::GenericOp op, OpOperand *t) { Location loc = op.getLoc(); + Type tp = getElementTypeOrSelf(t->get().getType()); + InitOp init = codegen.sparseOut->get().getDefiningOp(); // Direct lexicographic index order, tensor loads as zero. if (!codegen.expValues) { - Type tp = getElementTypeOrSelf(t->get().getType()); + if (init && init.startValue()) + return init.startValue(); return constantZero(builder, loc, tp); } // Load from expanded access pattern. Value index = genIndex(codegen, op, t); - return builder.create(loc, codegen.expValues, index); + // If startValue provided, use unless already filled + if (init && init.startValue()) { + Value isFilled = + builder.create(loc, codegen.expFilled, index); + scf::IfOp ifStmt = + builder.create(loc, tp, isFilled, /*else=*/true); + // True branch + builder.setInsertionPointToStart(ifStmt.thenBlock()); + Value valAtIndex = + builder.create(loc, codegen.expValues, index); + builder.create(loc, valAtIndex); + // False branch + builder.setInsertionPointToStart(ifStmt.elseBlock()); + builder.create(loc, init.startValue()); + builder.setInsertionPointAfter(ifStmt); + // End if + return ifStmt.getResult(0); + } else { + return builder.create(loc, codegen.expValues, index); + } } /// Generates insertion code to implement dynamic tensor store. diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mat_reductions.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mat_reductions.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mat_reductions.mlir @@ -0,0 +1,100 @@ +// RUN: mlir-opt %s --sparse-compiler | \ +// RUN: TENSOR0="%mlir_integration_test_dir/data/test.mtx" \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s +// +// Do the same run, but now with SIMDization as well. This should not change the outcome. +// +// RUN: mlir-opt %s --sparse-compiler="vectorization-strategy=2 vl=2" | \ +// RUN: TENSOR0="%mlir_integration_test_dir/data/test.mtx" \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +!Filename = !llvm.ptr + +#SparseMatrix = #sparse_tensor.encoding<{ + dimLevelType = [ "dense", "compressed" ] +}> + +#matmul_prodprod = { + indexing_maps = [ + affine_map<(i,j,k) -> (i,k)>, // A + affine_map<(i,j,k) -> (k,j)>, // B + affine_map<(i,j,k) -> (i,j)> // X (out) + ], + iterator_types = ["parallel", "parallel", "reduction"], + doc = "X(i,j) *= A(i,k) * B(k,j)" +} + +// +// Integration test that lowers a kernel annotated as sparse to +// actual sparse code, initializes a matching sparse storage scheme +// from file, and runs the resulting code with the JIT compiler. +// +module { + // + // A kernel that multiplies a sparse matrix A with itself + // into a sparse matrix X using PROD instead of SUM for the reduction. + // + func.func @kernel_prod(%arga: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %cf1 = arith.constant 1.0 : f64 + %d0 = tensor.dim %arga, %c0 : tensor + %d1 = tensor.dim %arga, %c1 : tensor + %argx = sparse_tensor.init [%d0, %d1] start_value=%cf1[f64] : tensor + %0 = linalg.generic #matmul_prodprod + ins(%arga, %arga: tensor, tensor) + outs(%argx: tensor) { + ^bb(%a: f64, %b: f64, %x: f64): + %0 = arith.mulf %a, %b : f64 + %1 = arith.mulf %x, %0 : f64 + linalg.yield %1 : f64 + } -> tensor + return %0 : tensor + } + + func.func private @getTensorFilename(index) -> (!Filename) + + // Dump a sparse matrix. + func.func @dump_mat(%arg0: tensor) { + %c0 = arith.constant 0 : index + %i0 = arith.constant -1.0 : f64 + %dm = sparse_tensor.convert %arg0 : tensor to tensor + %2 = bufferization.to_memref %dm : memref + %3 = vector.transfer_read %2[%c0, %c0], %i0: memref, vector<5x5xf64> + vector.print %3 : vector<5x5xf64> + memref.dealloc %2 : memref + return + } + + // + // Main driver that reads matrix from file and calls the sparse kernel. + // + func.func @entry() { + %c0 = arith.constant 0 : index + + // Read the sparse matrix from file, construct sparse storage. + %fileName = call @getTensorFilename(%c0) : (index) -> (!Filename) + %a = sparse_tensor.new %fileName : !Filename to tensor + + // Call kernel. + %0 = call @kernel_prod(%a) : (tensor) -> (tensor) + + // Print the result for verification. + // + // CHECK: ( ( 5.74, 0, 0, 7.84, 0 ), ( 0, 52, 0, 0, 62.5 ), ( 0, 0, 9, 0, 0 ), ( 67.24, 0, 0, 91.84, 0 ), ( 0, 270.4, 0, 0, 325 ) ) + // + call @dump_mat(%0) : (tensor) -> () + + // Release the resources. + sparse_tensor.release %a : tensor + sparse_tensor.release %0 : tensor + + return + } +} diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir @@ -24,8 +24,7 @@ module { func.func @redsum(%arga: tensor, - %argb: tensor) - -> tensor { + %argb: tensor) -> tensor { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %d0 = tensor.dim %arga, %c0 : tensor @@ -43,11 +42,43 @@ return %0 : tensor } - // Driver method to call and verify tensor kernel. - func.func @entry() { + func.func @redprod(%arga: tensor, + %argb: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %ci1 = arith.constant 1 : i32 + %d0 = tensor.dim %arga, %c0 : tensor + %d1 = tensor.dim %arga, %c1 : tensor + %xinit = sparse_tensor.init [%d0, %d1] start_value=%ci1[i32] : tensor + %0 = linalg.generic #redsum + ins(%arga, %argb: tensor, + tensor) + outs(%xinit: tensor) { + ^bb(%a: i32, %b: i32, %x: i32): + %0 = arith.muli %a, %b : i32 + %1 = arith.muli %x, %0 : i32 + linalg.yield %1 : i32 + } -> tensor + return %0 : tensor + } + + // Dump a sparse matrix. + func.func @dump_mat(%arg0: tensor) { %c0 = arith.constant 0 : index %i0 = arith.constant -1 : i32 + %0 = sparse_tensor.values %arg0 : tensor to memref + %1 = vector.transfer_read %0[%c0], %i0: memref, vector<4xi32> + vector.print %1 : vector<4xi32> + %dm = sparse_tensor.convert %arg0 : tensor to tensor + %2 = bufferization.to_memref %dm : memref + %3 = vector.transfer_read %2[%c0, %c0], %i0: memref, vector<3x3xi32> + vector.print %3 : vector<3x3xi32> + memref.dealloc %2 : memref + return + } + // Driver method to call and verify tensor kernel. + func.func @entry() { // Setup very sparse 3-d tensors. %t1 = arith.constant sparse< [ [1,1,3], [2,0,0], [2,2,1], [2,2,2], [2,2,3] ], [ 1, 2, 3, 4, 5 ] @@ -64,28 +95,26 @@ %0 = call @redsum(%st1, %st2) : (tensor, tensor) -> tensor + %1 = call @redprod(%st1, %st2) + : (tensor, + tensor) -> tensor // // Verify results. Only two entries stored in result. Correct structure. // // CHECK: ( 7, 69, -1, -1 ) // CHECK-NEXT: ( ( 0, 0, 0 ), ( 0, 7, 0 ), ( 0, 0, 69 ) ) + // CHECK: ( 7, 1080, -1, -1 ) + // CHECK-NEXT: ( ( 0, 0, 0 ), ( 0, 7, 0 ), ( 0, 0, 1080 ) ) // - %val = sparse_tensor.values %0 - : tensor to memref - %vv = vector.transfer_read %val[%c0], %i0: memref, vector<4xi32> - vector.print %vv : vector<4xi32> - %dm = sparse_tensor.convert %0 - : tensor to tensor - %db = bufferization.to_memref %dm : memref - %vm = vector.transfer_read %db[%c0, %c0], %i0: memref, vector<3x3xi32> - vector.print %vm : vector<3x3xi32> + call @dump_mat(%0) : (tensor) -> () + call @dump_mat(%1) : (tensor) -> () // Release the resources. sparse_tensor.release %st1 : tensor sparse_tensor.release %st2 : tensor sparse_tensor.release %0 : tensor - memref.dealloc %db : memref + sparse_tensor.release %1 : tensor return } }