diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -1166,10 +1166,15 @@ Type idxType = rewriter.getIndexType(); // All initialization should be done on entry of the loop nest. rewriter.setInsertionPointAfter(op.getTensor().getDefiningOp()); - // Determine the size for access expansion. + // Determine the size for access expansion (always the innermost stored + // dimension size, but we need to translate it back to the original + // dimension since the dim size utility applies dimension ordering). auto enc = getSparseTensorEncoding(srcType); Value src = adaptor.getOperands()[0]; - Value sz = genDimSizeCall(rewriter, loc, enc, src, srcType.getRank() - 1); + unsigned innerDim = srcType.getRank() - 1; + if (AffineMap p = enc.getDimOrdering()) + innerDim = p.getDimPosition(innerDim); + Value sz = genDimSizeCall(rewriter, loc, enc, src, innerDim); // Allocate temporary buffers for values, filled-switch, and indices. // We do not use stack buffers for this, since the expanded size may // be rather large (as it envelops a single expanded dense dimension). diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -494,7 +494,9 @@ } // CHECK-LABEL: func @sparse_expansion() -// CHECK: %[[S:.*]] = call @sparseDimSize +// CHECK-DAG: %[[C:.*]] = arith.constant 1 : index +// CHECK: %[[N:.*]] = call @newSparseTensor +// CHECK: %[[S:.*]] = call @sparseDimSize(%[[N]], %[[C]]) : (!llvm.ptr, index) -> index // CHECK: %[[A:.*]] = memref.alloc(%[[S]]) : memref // CHECK: %[[B:.*]] = memref.alloc(%[[S]]) : memref // CHECK: %[[C:.*]] = memref.alloc(%[[S]]) : memref @@ -502,9 +504,9 @@ // CHECK-DAG: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref) // CHECK: return %[[C]] : memref func.func @sparse_expansion() -> memref { - %0 = bufferization.alloc_tensor() : tensor<8x8xf64, #SparseMatrix> + %0 = bufferization.alloc_tensor() : tensor<4x8xf64, #SparseMatrix> %values, %filled, %added, %count = sparse_tensor.expand %0 - : tensor<8x8xf64, #SparseMatrix> to memref, memref, memref, index + : tensor<4x8xf64, #SparseMatrix> to memref, memref, memref, index return %added : memref } diff --git a/mlir/test/Dialect/SparseTensor/sparse_expand.mlir b/mlir/test/Dialect/SparseTensor/sparse_expand.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_expand.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_expand.mlir @@ -1,8 +1,21 @@ -// RUN: mlir-opt %s -sparsification | \ +// RUN: mlir-opt %s --linalg-generalize-named-ops \ +// RUN: --linalg-fuse-elementwise-ops \ +// RUN: --sparsification | \ // RUN: FileCheck %s --check-prefix=CHECK-SPARSE -// RUN: mlir-opt %s -sparsification -sparse-tensor-conversion | \ +// RUN: mlir-opt %s --linalg-generalize-named-ops \ +// RUN: --linalg-fuse-elementwise-ops \ +// RUN: --sparsification --sparse-tensor-conversion --cse | \ // RUN: FileCheck %s --check-prefix=CHECK-CONVERT +#CSR = #sparse_tensor.encoding<{ + dimLevelType = [ "dense", "compressed" ] +}> + +#CSC = #sparse_tensor.encoding<{ + dimLevelType = [ "dense", "compressed" ], + dimOrdering = affine_map<(i,j) -> (j,i)> +}> + #DCSC = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ], dimOrdering = affine_map<(i,j) -> (j,i)> @@ -24,22 +37,28 @@ // // CHECK-SPARSE-LABEL: func @kernel( // CHECK-SPARSE: %[[A:.*]], %[[B:.*]], %[[C:.*]], %{{.*}} = sparse_tensor.expand -// CHECK-SPARSE: scf.for -// CHECK-SPARSE: scf.for +// CHECK-SPARSE: scf.for {{.*}} { +// CHECK-SPARSE: scf.for {{.*}} { +// CHECK-SPARSE: } +// CHECK-SPARSE: } // CHECK-SPARSE: sparse_tensor.compress %{{.*}}, %{{.*}}, %[[A]], %[[B]], %[[C]] // CHECK-SPARSE: %[[RET:.*]] = sparse_tensor.load %{{.*}} hasInserts // CHECK-SPARSE: return %[[RET]] // // CHECK-CONVERT-LABEL: func @kernel( +// CHECK-CONVERT: %[[C:.*]] = arith.constant 0 : index // CHECK-CONVERT: %{{.*}} = call @sparseDimSize -// CHECK-CONVERT: %[[S:.*]] = call @sparseDimSize +// CHECK-CONVERT: %[[N:.*]] = call @newSparseTensor +// CHECK-CONVERT: %[[S:.*]] = call @sparseDimSize(%[[N]], %[[C]]) // CHECK-CONVERT: %[[A:.*]] = memref.alloc(%[[S]]) : memref // CHECK-CONVERT: %[[B:.*]] = memref.alloc(%[[S]]) : memref // CHECK-CONVERT: %[[C:.*]] = memref.alloc(%[[S]]) : memref // CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref) // CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref) -// CHECK-CONVERT: scf.for -// CHECK-CONVERT: scf.for +// CHECK-CONVERT: scf.for {{.*}} { +// CHECK-CONVERT: scf.for {{.*}} { +// CHECK-CONVERT: } +// CHECK-CONVERT: } // CHECK-CONVERT: call @expInsertF64 // CHECK-CONVERT: memref.dealloc %[[A]] : memref // CHECK-CONVERT: memref.dealloc %[[B]] : memref @@ -59,3 +78,99 @@ } -> tensor return %0 : tensor } + +// +// CHECK-SPARSE-LABEL: func @matmul1( +// CHECK-SPARSE-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-SPARSE-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-SPARSE-DAG: %[[C8:.*]] = arith.constant 8 : index +// CHECK-SPARSE: scf.for %{{.*}} = %[[C0]] to %[[C8]] step %[[C1]] { +// CHECK-SPARSE: %[[A:.*]], %[[B:.*]], %[[C:.*]], %{{.*}} = sparse_tensor.expand +// CHECK-SPARSE: scf.for {{.*}} { +// CHECK-SPARSE: scf.for {{.*}} { +// CHECK-SPARSE: } +// CHECK-SPARSE: } +// CHECK-SPARSE: sparse_tensor.compress %{{.*}}, %{{.*}}, %[[A]], %[[B]], %[[C]] +// CHECK-SPARSE: } +// CHECK-SPARSE: %[[RET:.*]] = sparse_tensor.load %{{.*}} hasInserts +// CHECK-SPARSE: return %[[RET]] +// +// CHECK-CONVERT-LABEL: func @matmul1( +// CHECK-CONVERT-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-CONVERT-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-CONVERT-DAG: %[[C8:.*]] = arith.constant 8 : index +// CHECK-CONVERT: %[[N:.*]] = call @newSparseTensor +// CHECK-CONVERT: %[[S:.*]] = call @sparseDimSize(%[[N]], %[[C1]]) +// CHECK-CONVERT: %[[A:.*]] = memref.alloc(%[[S]]) : memref +// CHECK-CONVERT: %[[B:.*]] = memref.alloc(%[[S]]) : memref +// CHECK-CONVERT: %[[C:.*]] = memref.alloc(%[[S]]) : memref +// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref) +// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref) +// CHECK-CONVERT: scf.for %{{.*}} = %[[C0]] to %[[C8]] step %[[C1]] { +// CHECK-CONVERT: scf.for {{.*}} { +// CHECK-CONVERT: scf.for {{.*}} { +// CHECK-CONVERT: } +// CHECK-CONVERT: } +// CHECK-CONVERT: call @expInsertF64 +// CHECK-CONVERT: } +// CHECK-CONVERT: memref.dealloc %[[A]] : memref +// CHECK-CONVERT: memref.dealloc %[[B]] : memref +// CHECK-CONVERT: memref.dealloc %[[C]] : memref +// CHECK-CONVERT: call @endInsert +// +func.func @matmul1(%A: tensor<8x2xf64, #CSR>, + %B: tensor<2x4xf64, #CSR>) -> tensor<8x4xf64, #CSR> { + %C = bufferization.alloc_tensor() : tensor<8x4xf64, #CSR> + %D = linalg.matmul + ins(%A, %B: tensor<8x2xf64, #CSR>, tensor<2x4xf64, #CSR>) + outs(%C: tensor<8x4xf64, #CSR>) -> tensor<8x4xf64, #CSR> + return %D: tensor<8x4xf64, #CSR> +} + +// +// CHECK-SPARSE-LABEL: func @matmul2( +// CHECK-SPARSE-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-SPARSE-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-SPARSE-DAG: %[[C4:.*]] = arith.constant 4 : index +// CHECK-SPARSE: scf.for %{{.*}} = %[[C0]] to %[[C4]] step %[[C1]] { +// CHECK-SPARSE: %[[A:.*]], %[[B:.*]], %[[C:.*]], %{{.*}} = sparse_tensor.expand +// CHECK-SPARSE: scf.for {{.*}} { +// CHECK-SPARSE: scf.for {{.*}} { +// CHECK-SPARSE: } +// CHECK-SPARSE: } +// CHECK-SPARSE: sparse_tensor.compress %{{.*}}, %{{.*}}, %[[A]], %[[B]], %[[C]] +// CHECK-SPARSE: } +// CHECK-SPARSE: %[[RET:.*]] = sparse_tensor.load %{{.*}} hasInserts +// CHECK-SPARSE: return %[[RET]] +// +// CHECK-CONVERT-LABEL: func @matmul2( +// CHECK-CONVERT-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-CONVERT-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-CONVERT-DAG: %[[C4:.*]] = arith.constant 4 : index +// CHECK-CONVERT: %[[N:.*]] = call @newSparseTensor +// CHECK-CONVERT: %[[S:.*]] = call @sparseDimSize(%[[N]], %[[C1]]) +// CHECK-CONVERT: %[[A:.*]] = memref.alloc(%[[S]]) : memref +// CHECK-CONVERT: %[[B:.*]] = memref.alloc(%[[S]]) : memref +// CHECK-CONVERT: %[[C:.*]] = memref.alloc(%[[S]]) : memref +// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref) +// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref) +// CHECK-CONVERT: scf.for %{{.*}} = %[[C0]] to %[[C4]] step %[[C1]] { +// CHECK-CONVERT: scf.for {{.*}} { +// CHECK-CONVERT: scf.for {{.*}} { +// CHECK-CONVERT: } +// CHECK-CONVERT: } +// CHECK-CONVERT: call @expInsertF64 +// CHECK-CONVERT: } +// CHECK-CONVERT: memref.dealloc %[[A]] : memref +// CHECK-CONVERT: memref.dealloc %[[B]] : memref +// CHECK-CONVERT: memref.dealloc %[[C]] : memref +// CHECK-CONVERT: call @endInsert +// +func.func @matmul2(%A: tensor<8x2xf64, #CSC>, + %B: tensor<2x4xf64, #CSC>) -> tensor<8x4xf64, #CSC> { + %C = bufferization.alloc_tensor() : tensor<8x4xf64, #CSC> + %D = linalg.matmul + ins(%A, %B: tensor<8x2xf64, #CSC>, tensor<2x4xf64, #CSC>) + outs(%C: tensor<8x4xf64, #CSC>) -> tensor<8x4xf64, #CSC> + return %D: tensor<8x4xf64, #CSC> +} diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir @@ -0,0 +1,79 @@ +// RUN: mlir-opt %s --sparse-compiler | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +#CSC = #sparse_tensor.encoding<{ + dimLevelType = [ "dense", "compressed" ], + dimOrdering = affine_map<(i,j) -> (j,i)> +}> + +module { + // + // Column-wise storage forces the ijk loop to permute into jki + // so that access pattern expansion (workspace) needs to be + // done along dimension with size 8. + // + func.func @matmul(%A: tensor<8x2xf64, #CSC>, + %B: tensor<2x4xf64, #CSC>) -> tensor<8x4xf64, #CSC> { + %C = bufferization.alloc_tensor() : tensor<8x4xf64, #CSC> + %D = linalg.matmul + ins(%A, %B: tensor<8x2xf64, #CSC>, tensor<2x4xf64, #CSC>) + outs(%C: tensor<8x4xf64, #CSC>) -> tensor<8x4xf64, #CSC> + return %D: tensor<8x4xf64, #CSC> + } + + // + // Main driver. + // + func.func @entry() { + %c0 = arith.constant 0 : index + %d1 = arith.constant -1.0 : f64 + + // Initialize various dense matrices for stress testing. + %da = arith.constant dense<[ + [ 1.1, 2.1 ], + [ 1.2, 2.2 ], + [ 1.3, 2.3 ], + [ 1.4, 2.4 ], + [ 1.5, 2.5 ], + [ 1.6, 2.6 ], + [ 1.7, 2.7 ], + [ 1.8, 2.8 ] + ]> : tensor<8x2xf64> + %db = arith.constant dense<[ + [ 10.1, 11.1, 12.1, 13.1 ], + [ 10.2, 11.2, 12.2, 13.2 ] + ]> : tensor<2x4xf64> + + // Convert all these matrices to sparse format. + %x1 = sparse_tensor.convert %da : tensor<8x2xf64> to tensor<8x2xf64, #CSC> + %x2 = sparse_tensor.convert %db : tensor<2x4xf64> to tensor<2x4xf64, #CSC> + + // Call kernels with dense. + %x3 = call @matmul(%x1, %x2) + : (tensor<8x2xf64, #CSC>, + tensor<2x4xf64, #CSC>) -> tensor<8x4xf64, #CSC> + + // + // CHECK: ( ( 32.53, 35.73, 38.93, 42.13 ), + // CHECK-SAME: ( 34.56, 37.96, 41.36, 44.76 ), + // CHECK-SAME: ( 36.59, 40.19, 43.79, 47.39 ), + // CHECK-SAME: ( 38.62, 42.42, 46.22, 50.02 ), + // CHECK-SAME: ( 40.65, 44.65, 48.65, 52.65 ), + // CHECK-SAME: ( 42.68, 46.88, 51.08, 55.28 ), + // CHECK-SAME: ( 44.71, 49.11, 53.51, 57.91 ), + // CHECK-SAME: ( 46.74, 51.34, 55.94, 60.54 ) ) + // + %xc = sparse_tensor.convert %x3 : tensor<8x4xf64, #CSC> to tensor<8x4xf64> + %xv = vector.transfer_read %xc[%c0, %c0], %d1 : tensor<8x4xf64>, vector<8x4xf64> + vector.print %xv : vector<8x4xf64> + + // Release the resources. + bufferization.dealloc_tensor %x1 : tensor<8x2xf64, #CSC> + bufferization.dealloc_tensor %x2 : tensor<2x4xf64, #CSC> + bufferization.dealloc_tensor %x3 : tensor<8x4xf64, #CSC> + + return + } +}