diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td @@ -815,6 +815,12 @@ ``` }]; + let builders = [ + OpBuilder<( + ins "Value":$tensor, + "function_ref")> + ]; + let regions = (region AnyRegion:$region); let assemblyFormat = "`in` $tensor attr-dict `:` type($tensor) `do` $region"; let hasVerifier = 1; diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -597,6 +597,32 @@ return success(); } +void ForeachOp::build( + OpBuilder &builder, OperationState &result, Value tensor, + function_ref bodyBuilder) { + build(builder, result, tensor); + if (!bodyBuilder) + return; + + auto rtp = tensor.getType().cast(); + int64_t rank = rtp.getRank(); + + SmallVector blockArgTypes; + // Starts with n index. + std::fill_n(std::back_inserter(blockArgTypes), rank, builder.getIndexType()); + // Followed by one value. + blockArgTypes.push_back(rtp.getElementType()); + + SmallVector blockArgLocs; + std::fill_n(std::back_inserter(blockArgLocs), rank + 1, tensor.getLoc()); + + OpBuilder::InsertionGuard guard(builder); + auto ®ion = *result.regions.front(); + Block *bodyBlock = + builder.createBlock(®ion, region.end(), blockArgTypes, blockArgLocs); + bodyBuilder(builder, result.location, bodyBlock->getArguments()); +} + LogicalResult ForeachOp::verify() { auto t = getTensor().getType().cast(); auto args = getBody()->getArguments(); diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp @@ -111,6 +111,32 @@ return isZeroValue(yieldOp.getOperand(0)); } +// TODO: The dim level property of the COO type relies on input tensors, the +// shape relies on the output tensor +// Helpers to setup a COO type. +static RankedTensorType getUnorderedCOOFromType(RankedTensorType src) { + auto *ctx = src.getContext(); + auto rank = src.getRank(); + SmallVector dims; + + // An unordered and non-unique compressed dim at beginning. + dims.push_back(SparseTensorEncodingAttr::DimLevelType::CompressedNuNo); + // TODO: it is actually ordered at the level for ordered input. + // Followed by unordered non-unique n-2 singleton levels. + std::fill_n(std::back_inserter(dims), rank - 2, + SparseTensorEncodingAttr::DimLevelType::SingletonNuNo); + // TODO: only if all the inputs (for concatentate) are unique at the last + // level should the COO has a unique level at the end. Ends by a unordered + // unique singleton level. + dims.push_back(SparseTensorEncodingAttr::DimLevelType::SingletonNo); + // TODO: Maybe pick the bitwidth based on input/output tensors (probably the + // largest one among them) in the original operation instead of using the + // default value. + auto enc = SparseTensorEncodingAttr::get( + ctx, dims, AffineMap::getMultiDimIdentityMap(rank, ctx), 0, 0); + return RankedTensorType::get(src.getShape(), src.getElementType(), enc); +} + //===---------------------------------------------------------------------===// // The actual sparse tensor rewriting rules. //===---------------------------------------------------------------------===// @@ -296,6 +322,61 @@ } }; +struct ConcatenateRewriter : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(ConcatenateOp op, + PatternRewriter &rewriter) const override { + auto loc = op.getLoc(); + auto rtp = op.getType().cast(); + // TODO: Build the output shape if needed. + assert(rtp.hasStaticShape()); + auto rank = rtp.getRank(); + size_t conDim = op.getDimension().getZExtValue(); + // %t = concatenate %s1, %s2, %s3 {dim = 1} + // ==> + // %tmp = bufferization.alloc_tensor : unordered COO + // foreach in %s1 : insert d0, d1, %tmp + // foreach in %s2 : insert d0, d1 + size(s1), %tmp + // foreach in %s3 : insert d0, d1 + size(s1) + size(s2), %tmp + // %t = sparse_tensor.cast %tmp + auto cooTp = getUnorderedCOOFromType(rtp); + auto cooBuffer = + rewriter.create(loc, cooTp, ValueRange()).getResult(); + + Value offset = constantIndex(rewriter, loc, 0); + for (Value input : op.getInputs()) { + // Builds the indexing map. + + // Build a for op for each input tensor to append new values into the + // output tensor. + rewriter.create( + loc, input, [&](OpBuilder &builder, Location loc, ValueRange args) { + SmallVector indices; + for (int64_t i = 0; i < rank; i++) { + uint64_t dim = + toStoredDim(getSparseTensorEncoding(input.getType()), i); + Value idx = args[dim]; + if (i == static_cast(conDim)) + // transform coordinates on matching dim + idx = builder.create(loc, idx, offset); + indices.push_back(idx); + } + builder.create(loc, args.back(), cooBuffer, indices); + builder.create(loc); + }); + // Accumulates the offset. Note that only static-shaped inputs are allowed + // by concatenate op verifier, which saves us from computing the offset + // dynamically. + auto d = input.getType().cast().getShape()[conDim]; + assert(!ShapedType::isDynamic(d)); + offset = rewriter.create(loc, offset, + constantIndex(rewriter, loc, d)); + } + rewriter.replaceOpWithNewOp(op, rtp, cooBuffer); + return success(); + } +}; + /// Sparse rewriting rule for the foreach operator. struct ForeachRewriter : public OpRewritePattern { public: @@ -363,4 +444,6 @@ ReshapeRewriter, ForeachRewriter>( patterns.getContext()); // TODO: If RT not enabled, rewrite concatenate ops, etc here. + if (!enableRT) + patterns.add(patterns.getContext()); } diff --git a/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir b/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir @@ -0,0 +1,81 @@ +// RUN: mlir-opt %s --sparsification=enable-runtime-library=false | FileCheck %s + +#DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> + +// CHECK-LABEL: @concat_sparse_sparse( +// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<2x4xf64, #sparse_tensor +// CHECK-SAME: %[[TMP_arg1:.*]]: tensor<3x4xf64, #sparse_tensor +// CHECK-SAME: %[[TMP_arg2:.*]]: tensor<4x4xf64, #sparse_tensor +// CHECK: %[[TMP_c0:.*]] = arith.constant 0 : index +// CHECK: %[[TMP_c1:.*]] = arith.constant 1 : index +// CHECK: %[[TMP_c5:.*]] = arith.constant 5 : index +// CHECK: %[[TMP_c2:.*]] = arith.constant 2 : index +// CHECK: %[[TMP_0:.*]] = bufferization.alloc_tensor() : tensor<9x4xf64, #sparse_tensor +// CHECK: %[[TMP_1:.*]] = sparse_tensor.pointers %[[TMP_arg0]] {dimension = 0 : index} : tensor<2x4xf64, #sparse_tensor +// CHECK: %[[TMP_2:.*]] = sparse_tensor.indices %[[TMP_arg0]] {dimension = 0 : index} : tensor<2x4xf64, #sparse_tensor +// CHECK: %[[TMP_3:.*]] = sparse_tensor.pointers %[[TMP_arg0]] {dimension = 1 : index} : tensor<2x4xf64, #sparse_tensor +// CHECK: %[[TMP_4:.*]] = sparse_tensor.indices %[[TMP_arg0]] {dimension = 1 : index} : tensor<2x4xf64, #sparse_tensor +// CHECK: %[[TMP_5:.*]] = sparse_tensor.values %[[TMP_arg0]] : tensor<2x4xf64, #sparse_tensor +// CHECK: %[[TMP_6:.*]] = memref.load %[[TMP_1]][%[[TMP_c0]]] : memref +// CHECK: %[[TMP_7:.*]] = memref.load %[[TMP_1]][%[[TMP_c1]]] : memref +// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_6]] to %[[TMP_7]] step %[[TMP_c1]] { +// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_2]][%[[TMP_arg3]]] : memref +// CHECK: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index +// CHECK: %[[TMP_25:.*]] = memref.load %[[TMP_3]][%[[TMP_arg3]]] : memref +// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_3]][%[[TMP_24]]] : memref +// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] { +// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_4]][%[[TMP_arg4]]] : memref +// CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_5]][%[[TMP_arg4]]] : memref +// CHECK: sparse_tensor.insert %[[TMP_28]] into %[[TMP_0]][%[[TMP_23]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor +// CHECK: } +// CHECK: } +// CHECK: %[[TMP_8:.*]] = sparse_tensor.pointers %[[TMP_arg1]] {dimension = 0 : index} : tensor<3x4xf64, #sparse_tensor +// CHECK: %[[TMP_9:.*]] = sparse_tensor.indices %[[TMP_arg1]] {dimension = 0 : index} : tensor<3x4xf64, #sparse_tensor +// CHECK: %[[TMP_10:.*]] = sparse_tensor.pointers %[[TMP_arg1]] {dimension = 1 : index} : tensor<3x4xf64, #sparse_tensor +// CHECK: %[[TMP_11:.*]] = sparse_tensor.indices %[[TMP_arg1]] {dimension = 1 : index} : tensor<3x4xf64, #sparse_tensor +// CHECK: %[[TMP_12:.*]] = sparse_tensor.values %[[TMP_arg1]] : tensor<3x4xf64, #sparse_tensor +// CHECK: %[[TMP_13:.*]] = memref.load %[[TMP_8]][%[[TMP_c0]]] : memref +// CHECK: %[[TMP_14:.*]] = memref.load %[[TMP_8]][%[[TMP_c1]]] : memref +// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_13]] to %[[TMP_14]] step %[[TMP_c1]] { +// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_9]][%[[TMP_arg3]]] : memref +// CHECK: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index +// CHECK: %[[TMP_25:.*]] = memref.load %[[TMP_10]][%[[TMP_arg3]]] : memref +// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_10]][%[[TMP_24]]] : memref +// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] { +// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_11]][%[[TMP_arg4]]] : memref +// CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_12]][%[[TMP_arg4]]] : memref +// CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c2]] : index +// CHECK: sparse_tensor.insert %[[TMP_28]] into %[[TMP_0]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor +// CHECK: } +// CHECK: } +// CHECK: %[[TMP_15:.*]] = sparse_tensor.pointers %[[TMP_arg2]] {dimension = 0 : index} : tensor<4x4xf64, #sparse_tensor +// CHECK: %[[TMP_16:.*]] = sparse_tensor.indices %[[TMP_arg2]] {dimension = 0 : index} : tensor<4x4xf64, #sparse_tensor +// CHECK: %[[TMP_17:.*]] = sparse_tensor.pointers %[[TMP_arg2]] {dimension = 1 : index} : tensor<4x4xf64, #sparse_tensor +// CHECK: %[[TMP_18:.*]] = sparse_tensor.indices %[[TMP_arg2]] {dimension = 1 : index} : tensor<4x4xf64, #sparse_tensor +// CHECK: %[[TMP_19:.*]] = sparse_tensor.values %[[TMP_arg2]] : tensor<4x4xf64, #sparse_tensor +// CHECK: %[[TMP_20:.*]] = memref.load %[[TMP_15]][%[[TMP_c0]]] : memref +// CHECK: %[[TMP_21:.*]] = memref.load %[[TMP_15]][%[[TMP_c1]]] : memref +// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_20]] to %[[TMP_21]] step %[[TMP_c1]] { +// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_16]][%[[TMP_arg3]]] : memref +// CHECK: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index +// CHECK: %[[TMP_25:.*]] = memref.load %[[TMP_17]][%[[TMP_arg3]]] : memref +// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_17]][%[[TMP_24]]] : memref +// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] { +// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_18]][%[[TMP_arg4]]] : memref +// CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_19]][%[[TMP_arg4]]] : memref +// CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c5]] : index +// CHECK: sparse_tensor.insert %[[TMP_28]] into %[[TMP_0]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor +// CHECK: } +// CHECK: } +// CHECK: %[[TMP_22:.*]] = sparse_tensor.convert %[[TMP_0]] : tensor<9x4xf64, #sparse_tensor +// CHECK: return %[[TMP_22]] : tensor<9x4xf64, #sparse_tensor +func.func @concat_sparse_sparse(%arg0: tensor<2x4xf64, #DCSR>, + %arg1: tensor<3x4xf64, #DCSR>, + %arg2: tensor<4x4xf64, #DCSR>) + -> tensor<9x4xf64, #DCSR> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index} + : tensor<2x4xf64, #DCSR>, + tensor<3x4xf64, #DCSR>, + tensor<4x4xf64, #DCSR> to tensor<9x4xf64, #DCSR> + return %0 : tensor<9x4xf64, #DCSR> +}