diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h @@ -103,7 +103,7 @@ /// Returns true iff the given type is a COO type where the last level /// is unique. -bool isUniqueCOOType(TensorType tp); +bool isUniqueCOOType(Type tp); /// Returns the starting level for a trailing COO region that spans /// at least two levels. If no such COO region is found, then returns diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -453,7 +453,7 @@ return !isUnique || enc.isUniqueLvl(lvlRank - 1); } -bool mlir::sparse_tensor::isUniqueCOOType(TensorType tp) { +bool mlir::sparse_tensor::isUniqueCOOType(Type tp) { return isCOOType(getSparseTensorEncoding(tp), 0, /*isUnique=*/true); } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.h b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.h --- a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.h @@ -256,6 +256,7 @@ std::vector> pidxs; std::vector> coord; std::vector> highs; + std::vector> lvlSizes; std::vector> ptrBuffer; // to_pointers std::vector> idxBuffer; // to_indices std::vector valBuffer; // to_value @@ -276,6 +277,28 @@ /// general. std::vector sparsiferLoopLvlMap; + // + // View based reshape related-fields and methods + // + + /// Collapse Reassociations related to a specific tensor + // TODO: support expand. + std::vector collapseReassoc; + + /// Get the collapse reassociation for tensors[tid] on l. For unreshaped + /// operands, the reassociation is simply an identity transformation. + SmallVector getCollapseReassociation(unsigned tid, unsigned l) { + // Returns for SmallVector just like `ReassociaionIndices` + if (auto reass = collapseReassoc[tid]) { + auto attr = reass[l]; + return llvm::to_vector<2>( + llvm::map_range(attr.cast(), [&](Attribute indexAttr) { + return indexAttr.cast().getInt(); + })); + } + return {l}; + } + /// TODO: not yet used, it should track the current level for each tensor /// to help eliminate `dim` paramters from above APIs. /// std::vector curLv; diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp @@ -132,31 +132,43 @@ initialize(tensors, loopTag, hasOutput, isSparseOut, topSort); } -void LoopEmitter::initialize(ValueRange tensors, StringAttr loopTag, - bool hasOutput, bool isSparseOut, - ArrayRef topSort) { +void LoopEmitter::initialize(ValueRange ts, StringAttr loopTag, bool hasOutput, + bool isSparseOut, ArrayRef topSort) { // First initializes fields. this->loopTag = loopTag; this->hasOutput = hasOutput; this->isSparseOut = isSparseOut; - this->tensors.assign(tensors.begin(), tensors.end()); + this->tensors.assign(ts.begin(), ts.end()); this->isSparseSlices.assign(tensors.size(), false); this->dimTypes.assign(tensors.size(), std::vector()); this->pidxs.assign(tensors.size(), std::vector()); this->coord.assign(tensors.size(), std::vector()); this->highs.assign(tensors.size(), std::vector()); + this->lvlSizes.assign(tensors.size(), std::vector()); this->ptrBuffer.assign(tensors.size(), std::vector()); this->idxBuffer.assign(tensors.size(), std::vector()); this->valBuffer.assign(tensors.size(), nullptr); this->loopStack.reserve(topSort.size()); this->sparsiferLoopLvlMap.assign(topSort.size(), 0); + this->collapseReassoc.assign(tensors.size(), nullptr); for (size_t tid = 0, e = tensors.size(); tid < e; tid++) { auto t = tensors[tid]; // a scalar or 0-dimension tensors if (isZeroRankedTensorOrScalar(t.getType())) continue; + auto rtp = getRankedTensorType(t); + if (auto reshape = t.getDefiningOp(); + isUniqueCOOType(rtp) && reshape) { + // TODO: Supports more kinds of sparse tensors. + // FIXME: We should instead lower reshape operations on sparse tensors to + // view change. + collapseReassoc[tid] = reshape.getReassociation(); + rtp = reshape.getSrcType(); + // Overwrites the tensor to the source tensor of reshape operations. + tensors[tid] = t = reshape.getSrc(); + } auto rank = static_cast(rtp.getRank()); auto enc = getSparseTensorEncoding(rtp); // We always treat sparse output tensor as dense so that we always iterate @@ -172,6 +184,7 @@ pidxs[tid].assign(rank, Value()); coord[tid].assign(rank, Value()); highs[tid].assign(rank, Value()); + lvlSizes[tid].assign(rank, Value()); ptrBuffer[tid].assign(rank, Value()); idxBuffer[tid].assign(rank, Value()); } @@ -224,7 +237,8 @@ // Find upper bound in current dimension. // FIXME: `toOrigDim` is deprecated const Dimension d = toOrigDim(enc, l); - highs[t][l] = mlir::linalg::createOrFoldDimOp(builder, loc, tensor, d); + lvlSizes[t][l] = highs[t][l] = + mlir::linalg::createOrFoldDimOp(builder, loc, tensor, d); } // Perform the required bufferization. Dense inputs materialize @@ -325,6 +339,8 @@ } auto enc = getSparseTensorEncoding(tensors[tid].getType()); + auto reass = getCollapseReassociation(tid, dim); + dim = reass.front(); // TODO: support dynamic slices. Value step = constantIndex(builder, loc, 1); Value lo = isSparseInput ? pidxs[tid][dim] // current offset @@ -334,6 +350,7 @@ Operation *loop = nullptr; Value iv; if (isParallel) { + assert(collapseReassoc[tid] == nullptr); scf::ParallelOp parOp = builder.create(loc, lo, hi, step, reduc); builder.setInsertionPointToStart(parOp.getBody()); @@ -365,10 +382,21 @@ Value c; if (isSparseInput) { - pidxs[tid][dim] = iv; - // Generating a load on the indices array yields the coordinate. - Value ptr = idxBuffer[tid][dim]; - c = genIndexLoad(builder, loc, ptr, iv); + assert(reass.size() == 1 || isUniqueCOOType(tensors[tid].getType())); + c = constantIndex(builder, loc, 0); + for (unsigned i = 0; i < reass.size(); i++) { + auto lvl = reass[i]; + // For COO, the pidxs are always the same across consecutive levels. + pidxs[tid][lvl] = iv; + // Generating a load on the indices array yields the coordinate. + Value ptr = idxBuffer[tid][lvl]; + Value off = genIndexLoad(builder, loc, ptr, iv); + c = builder.create(loc, c, off); + if (i != reass.size() - 1) { + c = builder.create(loc, c, + this->lvlSizes[tid][reass[i + 1]]); + } + } } else { // Dense tensor, the coordinates is the inducation variable. c = iv; @@ -643,27 +671,30 @@ if (isDenseDLT(dimType)) return; - // Either the first dimension, or the previous dimension has been set. - assert(dim == 0 || pidxs[tid][dim - 1]); - Value c0 = constantIndex(builder, loc, 0); - Value c1 = constantIndex(builder, loc, 1); - if (isCompressedDLT(dimType)) { - Value ptr = ptrBuffer[tid][dim]; - - Value pLo = dim == 0 ? c0 : pidxs[tid][dim - 1]; - pidxs[tid][dim] = genIndexLoad(builder, loc, ptr, pLo); - - Value pHi = builder.create(loc, pLo, c1); - highs[tid][dim] = genIndexLoad(builder, loc, ptr, pHi); - return; - } - if (isSingletonDLT(dimType)) { - Value pLo = dim == 0 ? c0 : pidxs[tid][dim - 1]; - Value pHi = builder.create(loc, pLo, c1); + auto reassoc = getCollapseReassociation(tid, dim); + for (auto lvl : reassoc) { + // Either the first dimension, or the previous dimension has been set. + assert(lvl == 0 || pidxs[tid][lvl - 1]); + Value c0 = constantIndex(builder, loc, 0); + Value c1 = constantIndex(builder, loc, 1); + if (isCompressedDLT(dimType)) { + Value ptr = ptrBuffer[tid][lvl]; + + Value pLo = lvl == 0 ? c0 : pidxs[tid][lvl - 1]; + pidxs[tid][lvl] = genIndexLoad(builder, loc, ptr, pLo); + + Value pHi = builder.create(loc, pLo, c1); + highs[tid][lvl] = genIndexLoad(builder, loc, ptr, pHi); + return; + } + if (isSingletonDLT(dimType)) { + Value pLo = lvl == 0 ? c0 : pidxs[tid][lvl - 1]; + Value pHi = builder.create(loc, pLo, c1); - pidxs[tid][dim] = pLo; - highs[tid][dim] = pHi; - return; + pidxs[tid][lvl] = pLo; + highs[tid][lvl] = pHi; + return; + } } llvm_unreachable("Unrecognizable dimesion type!"); diff --git a/mlir/test/Dialect/SparseTensor/sparse_reshape_dot.mlir b/mlir/test/Dialect/SparseTensor/sparse_reshape_dot.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/SparseTensor/sparse_reshape_dot.mlir @@ -0,0 +1,59 @@ +// RUN: mlir-opt %s --linalg-generalize-named-ops --sparsification --cse --canonicalize | FileCheck %s + +#COO_2D = #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ], pointerBitWidth = 32, indexBitWidth = 32 }> +#COO_3D = #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton-nu", "singleton" ], pointerBitWidth = 32, indexBitWidth = 32 }> + +// CHECK-LABEL: func.func @sparse_reshape_fused( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<5x6xf32>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor<6x2x3xf32, +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 5 : index +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_6:.*]] = tensor.empty() : tensor<5x6xf32> +// CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_1]] {dimension = 0 : index} +// CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} +// CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 1 : index} +// CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 2 : index} +// CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] +// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_6]] : memref<5x6xf32> +// CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_4]] to %[[VAL_2]] step %[[VAL_5]] { +// CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_15:.*]] = arith.extui %[[VAL_14]] : i32 to i64 +// CHECK: %[[VAL_16:.*]] = arith.index_cast %[[VAL_15]] : i64 to index +// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref +// CHECK: %[[VAL_18:.*]] = arith.extui %[[VAL_17]] : i32 to i64 +// CHECK: %[[VAL_19:.*]] = arith.index_cast %[[VAL_18]] : i64 to index +// CHECK: scf.for %[[VAL_20:.*]] = %[[VAL_16]] to %[[VAL_19]] step %[[VAL_5]] { +// CHECK: %[[VAL_21:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_20]]] : memref> +// CHECK: %[[VAL_22:.*]] = arith.extui %[[VAL_21]] : i32 to i64 +// CHECK: %[[VAL_23:.*]] = arith.index_cast %[[VAL_22]] : i64 to index +// CHECK: %[[VAL_24:.*]] = tensor.extract %[[VAL_0]]{{\[}}%[[VAL_13]], %[[VAL_23]]] : tensor<5x6xf32> +// CHECK: %[[VAL_25:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_20]]] : memref> +// CHECK: %[[VAL_26:.*]] = arith.extui %[[VAL_25]] : i32 to i64 +// CHECK: %[[VAL_27:.*]] = arith.index_cast %[[VAL_26]] : i64 to index +// CHECK: %[[VAL_28:.*]] = arith.muli %[[VAL_27]], %[[VAL_3]] : index +// CHECK: %[[VAL_29:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_20]]] : memref> +// CHECK: %[[VAL_30:.*]] = arith.extui %[[VAL_29]] : i32 to i64 +// CHECK: %[[VAL_31:.*]] = arith.index_cast %[[VAL_30]] : i64 to index +// CHECK: %[[VAL_32:.*]] = arith.addi %[[VAL_28]], %[[VAL_31]] : index +// CHECK: %[[VAL_33:.*]] = tensor.extract %[[VAL_6]]{{\[}}%[[VAL_13]], %[[VAL_32]]] : tensor<5x6xf32> +// CHECK: %[[VAL_34:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_20]]] : memref +// CHECK: %[[VAL_35:.*]] = arith.mulf %[[VAL_24]], %[[VAL_34]] : f32 +// CHECK: %[[VAL_36:.*]] = arith.addf %[[VAL_33]], %[[VAL_35]] : f32 +// CHECK: memref.store %[[VAL_36]], %[[VAL_12]]{{\[}}%[[VAL_13]], %[[VAL_32]]] : memref<5x6xf32> +// CHECK: } {"Emitted from" = "linalg.generic"} +// CHECK: } {"Emitted from" = "linalg.generic"} +// CHECK: %[[VAL_37:.*]] = bufferization.to_tensor %[[VAL_12]] : memref<5x6xf32> +// CHECK: %[[VAL_38:.*]] = tensor.expand_shape %[[VAL_37]] {{\[\[}}0], [1, 2]] : tensor<5x6xf32> into tensor<5x2x3xf32> +// CHECK: %[[VAL_39:.*]] = tensor.cast %[[VAL_38]] : tensor<5x2x3xf32> to tensor +// CHECK: return %[[VAL_39]] : tensor +// CHECK: } +func.func @sparse_reshape_fused(%arg0: tensor<5x6xf32>, %arg1: tensor<6x2x3xf32, #COO_3D>) -> tensor { + %collapsed = tensor.collapse_shape %arg1 [[0], [1, 2]] : tensor<6x2x3xf32, #COO_3D> into tensor<6x6xf32, #COO_2D> + %0 = tensor.empty() : tensor<5x6xf32> + %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32>, tensor<6x6xf32, #COO_2D>) outs(%0 : tensor<5x6xf32>) -> tensor<5x6xf32> + %expanded = tensor.expand_shape %2 [[0], [1, 2]] : tensor<5x6xf32> into tensor<5x2x3xf32> + %ret1 = tensor.cast %expanded : tensor<5x2x3xf32> to tensor + return %ret1 : tensor +} diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/reshape_dot.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/reshape_dot.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/reshape_dot.mlir @@ -0,0 +1,108 @@ +// DEFINE: %{option} = enable-runtime-library=false +// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option} +// DEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/test.mtx" \ +// DEFINE: mlir-cpu-runner \ +// DEFINE: -e entry -entry-point-result=void \ +// DEFINE: -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils | \ +// DEFINE: FileCheck %s +// +// RUN: %{compile} | %{run} +// +// Do the same run, but now with direct IR generation. +// REDEFINE: %{option} = "enable-runtime-library=true" +// RUN: %{compile} | %{run} +// +// Do the same run, but now with direct IR generation and vectorization. +// REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true" +// RUN: %{compile} | %{run} + +#COO_2D = #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ], pointerBitWidth = 32, indexBitWidth = 32 }> +#COO_3D = #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton-nu", "singleton" ], pointerBitWidth = 32, indexBitWidth = 32 }> + +module { + func.func private @printMemref3dF32(%ptr : tensor) attributes { llvm.emit_c_interface } + func.func private @printMemref2dF32(%ptr : tensor) attributes { llvm.emit_c_interface } + + func.func @test_sparse(%arg0: tensor<5x6xf32>, %arg1: tensor<6x2x3xf32, #COO_3D>) -> tensor { + %collapsed = tensor.collapse_shape %arg1 [[0], [1, 2]] : tensor<6x2x3xf32, #COO_3D> into tensor<6x6xf32, #COO_2D> + %0 = tensor.empty() : tensor<5x6xf32> + %cst = arith.constant 0.000000e+00 : f32 + %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<5x6xf32>) -> tensor<5x6xf32> + %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32>, tensor<6x6xf32, #COO_2D>) outs(%1 : tensor<5x6xf32>) -> tensor<5x6xf32> + %expanded = tensor.expand_shape %2 [[0], [1, 2]] : tensor<5x6xf32> into tensor<5x2x3xf32> + %ret1 = tensor.cast %expanded : tensor<5x2x3xf32> to tensor + return %ret1 : tensor + } + + func.func @test_dense(%arg0: tensor<5x6xf32>, %arg1: tensor<6x2x3xf32>) -> tensor { + %collapsed = tensor.collapse_shape %arg1 [[0], [1, 2]] : tensor<6x2x3xf32> into tensor<6x6xf32> + %0 = tensor.empty() : tensor<5x6xf32> + %cst = arith.constant 0.000000e+00 : f32 + %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<5x6xf32>) -> tensor<5x6xf32> + %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32>, tensor<6x6xf32>) outs(%1 : tensor<5x6xf32>) -> tensor<5x6xf32> + %expanded = tensor.expand_shape %2 [[0], [1, 2]] : tensor<5x6xf32> into tensor<5x2x3xf32> + %ret1 = tensor.cast %expanded : tensor<5x2x3xf32> to tensor + return %ret1 : tensor + } + + + func.func @entry() { + // Setup two sparse vectors. + %d1 = arith.constant sparse< + [ [0, 0], [1, 1], [2, 2], [2, 3], [4, 5] ], + [1.0, 2.0, 3.0, 4.0, 5.0] + > : tensor<5x6xf32> + + %d2 = arith.constant sparse< + [ [0, 0, 0], [1, 1, 1], [2, 1, 1] ], + [ 6.0, 7.0, 8.0] + > : tensor<6x2x3xf32> + + // CHECK: Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [5, 2, 3] strides = [6, 3, 1] data = + // CHECK-NEXT:[ + // CHECK-SAME: [ + // CHECK-SAME: [6, 0, 0], + // CHECK-NEXT: [0, 0, 0]], + // CHECK-NEXT: [ + // CHECK-SAME: [0, 0, 0], + // CHECK-NEXT: [0, 14, 0]], + // CHECK-NEXT: [ + // CHECK-SAME: [0, 0, 0], + // CHECK-NEXT: [0, 24, 0]], + // CHECK-NEXT: [ + // CHECK-SAME: [0, 0, 0], + // CHECK-NEXT: [0, 0, 0]], + // CHECK-NEXT: [ + // CHECK-SAME: [0, 0, 0], + // CHECK-NEXT: [0, 0, 0]]] + %do1 = call @test_dense(%d1, %d2) : (tensor<5x6xf32>, tensor<6x2x3xf32>) -> tensor + call @printMemref3dF32(%do1) : (tensor) -> () + + // Same results. + // _CHECK-NEXT: Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [5, 2, 3] strides = [6, 3, 1] data = + // _CHECK-NEXT:[ + // _CHECK-SAME: [ + // _CHECK-SAME: [6, 0, 0], + // _CHECK-NEXT: [0, 0, 0]], + // _CHECK-NEXT: [ + // _CHECK-SAME: [0, 0, 0], + // _CHECK-NEXT: [0, 14, 0]], + // _CHECK-NEXT: [ + // _CHECK-SAME: [0, 0, 0], + // _CHECK-NEXT: [0, 24, 0]], + // _CHECK-NEXT: [ + // _CHECK-SAME: [0, 0, 0], + // _CHECK-NEXT: [0, 0, 0]], + // _CHECK-NEXT: [ + // _CHECK-SAME: [0, 0, 0], + // _CHECK-NEXT: [0, 0, 0]]] + %s2 = sparse_tensor.convert %d2 : tensor<6x2x3xf32> to tensor<6x2x3xf32, #COO_3D> + %so1 = call @test_sparse(%d1, %s2): (tensor<5x6xf32>, tensor<6x2x3xf32, #COO_3D>) -> tensor + call @printMemref3dF32(%so1) : (tensor) -> () + + bufferization.dealloc_tensor %s2 : tensor<6x2x3xf32, #COO_3D> + bufferization.dealloc_tensor %do1 : tensor + bufferization.dealloc_tensor %so1 : tensor + return + } +}