diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -117,6 +117,26 @@ .getResult(0); } +/// Compute the size from type (for static sizes) or from an already-converted +/// opaque pointer source (for dynamic sizes) at the given dimension. +static Value sizeFromPtrAtDim(OpBuilder &builder, Operation *op, + SparseTensorEncodingAttr &enc, ShapedType stp, + Value src, unsigned dim) { + auto shape = stp.getShape(); + if (shape[dim] == ShapedType::kDynamicSize) + return genDimSizeCall(builder, op, enc, src, dim); + return constantIndex(builder, op->getLoc(), shape[dim]); +} + +/// Populates given sizes array from type (for static sizes) and from +/// an already-converted opaque pointer source (for dynamic sizes). +static void sizesFromPtr(OpBuilder &builder, SmallVector &sizes, + Operation *op, SparseTensorEncodingAttr &enc, + ShapedType stp, Value src) { + for (unsigned i = 0, rank = stp.getRank(); i < rank; i++) + sizes.push_back(sizeFromPtrAtDim(builder, op, enc, stp, src, i)); +} + /// Populates given sizes array from type. static void sizesFromType(OpBuilder &builder, SmallVector &sizes, Location loc, ShapedType stp) { @@ -135,18 +155,42 @@ sizes.push_back(linalg::createOrFoldDimOp(builder, loc, src, i)); } -/// Populates given sizes array from type (for static sizes) and from -/// an already converted into opague pointer source (for dynamic sizes). -static void sizesFromPtr(OpBuilder &builder, SmallVector &sizes, - Operation *op, SparseTensorEncodingAttr &enc, - ShapedType stp, Value src) { +/// Populates the given sizes array for concatenation from type (for static +/// sizes) and from an already-converted opaque pointer source (for dynamic +/// sizes). +static void concatSizesFromInputs(OpBuilder &builder, + SmallVector &sizes, Operation *op, + ShapedType dstTp, ValueRange srcs, + unsigned dim) { Location loc = op->getLoc(); - auto shape = stp.getShape(); - for (unsigned i = 0, rank = stp.getRank(); i < rank; i++) - if (shape[i] == ShapedType::kDynamicSize) - sizes.push_back(genDimSizeCall(builder, op, enc, src, i)); - else - sizes.push_back(constantIndex(builder, loc, shape[i])); + auto dstShape = dstTp.getShape(); + + auto srcTp = srcs[0].getType().cast(); + auto srcEnc = getSparseTensorEncoding(srcTp); + // We first fills the sizes from an input tensor, and then + // compute the size of the concatenation dimension if necessary. + if (srcEnc) + // Reuses sizes from an arbitrary input tensor is fine. + sizesFromPtr(builder, sizes, op, srcEnc, srcTp, srcs[0]); + else + sizesFromSrc(builder, sizes, loc, srcs[0]); + + // Sum up on the `dim` if the dimension is dynamic. + if (dstShape[dim] != ShapedType::kDynamicSize) { + // Faithfully take the static size. + sizes[dim] = constantIndex(builder, loc, dstShape[dim]); + } else { + // Else, compute the shape dynamically. + for (size_t i = 1, sz = srcs.size(); i < sz; i++) { + auto srcTp = srcs[i].getType().cast(); + auto encSrc = getSparseTensorEncoding(srcTp); + Value srcSz = + encSrc ? sizeFromPtrAtDim(builder, op, encSrc, srcTp, srcs[i], dim) + : linalg::createOrFoldDimOp(builder, loc, srcs[i], dim); + // Sum up all the sizes. + sizes[dim] = builder.create(loc, sizes[dim], srcSz); + } + } } /// Generates an uninitialized temporary buffer of the given size and @@ -234,6 +278,20 @@ params.push_back(ptr); } +/// Generates the code to read the value from tensor[ivs].The generated code +/// looks like the following and the insertion point after this routine is +/// inside the if-then branch behind the assignment to ind. +/// if (tensor[ivs] != 0) +/// insert_point +static Value genValueForDense(OpBuilder &builder, Location loc, Value tensor, + ValueRange ivs) { + Value val = builder.create(loc, tensor, ivs); + Value cond = genIsNonzero(builder, loc, val); + scf::IfOp ifOp = builder.create(loc, cond, /*else*/ false); + builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); + return val; +} + /// Generates the code to read the value from tensor[ivs], and conditionally /// stores the indices ivs to the memory in ind. The generated code looks like /// the following and the insertion point after this routine is inside the @@ -243,10 +301,7 @@ /// ind = ivs static Value genIndexAndValueForDense(OpBuilder &builder, Location loc, Value tensor, Value ind, ValueRange ivs) { - Value val = builder.create(loc, tensor, ivs); - Value cond = genIsNonzero(builder, loc, val); - scf::IfOp ifOp = builder.create(loc, cond, /*else*/ false); - builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); + Value val = genValueForDense(builder, loc, tensor, ivs); unsigned i = 0; for (auto iv : ivs) { Value idx = constantIndex(builder, loc, i++); @@ -346,18 +401,43 @@ builder.create(loc, buffer); } -/// Inserts the element returned by genGetNextCall(_, ind, elemPtr) into -/// the tensor created by allocDenseTensor(). The `rank` is the rank -/// of the `tensor` and the length of `ind`. -static void insertScalarIntoDenseTensor(OpBuilder &builder, Location loc, - Value elemPtr, Value tensor, - unsigned rank, Value ind) { +/// Converts a pointer to COO (from calls to iter->next()) into a vector of +/// indices, apply (optional) `offset` on `offsetDim`. +static SmallVector loadIndices(OpBuilder &builder, Location loc, + unsigned rank, Value ind, + unsigned offsetDim = 0, + Value offset = Value()) { SmallVector ivs; ivs.reserve(rank); for (unsigned i = 0; i < rank; i++) { Value idx = constantIndex(builder, loc, i); - ivs.push_back(builder.create(loc, ind, idx)); + idx = builder.create(loc, ind, idx); + if (offsetDim == i && offset) + idx = builder.create(loc, idx, offset); + ivs.push_back(idx); + } + return ivs; +} + +/// Converts the vector indices and store it into the memory pointed by +/// `ind`, apply (optional) `offset` on `offsetDim`. +static void storeIndices(OpBuilder &builder, Location loc, unsigned rank, + Value ind, ValueRange ivs, unsigned offsetDim = 0, + Value offset = Value()) { + for (unsigned i = 0; i < rank; i++) { + Value idx = ivs[i]; + if (offsetDim == i && offset) + idx = builder.create(loc, idx, offset); + builder.create(loc, idx, ind, + constantIndex(builder, loc, i)); } +} + +/// Inserts a value stored in `elemPtr` into a dense tensor created by +/// allocDenseTensor(). +static void insertScalarIntoDenseTensor(OpBuilder &builder, Location loc, + Value elemPtr, Value tensor, + ValueRange ivs) { Value elemV = builder.create(loc, elemPtr); builder.create(loc, elemV, tensor, ivs); } @@ -510,6 +590,100 @@ return success(); } +// Generates a while loop that iterates over the COO list extracted +// from `t`, using `bodyBuilder` to build the loop body. +// while (elem = coo->getNext()) { +// bodyBuilder +// } +// TODO: Get rid of Operation *op in the parameters list! It seems +// that we only use it for op->getLoc(), pass the loc directly instead! +// TODO: It can be used by other operators (ReshapeOp, ConvertOP) conversion to +// reduce code repetition! +static void genSparseCOOIterationLoop( + ConversionPatternRewriter &rewriter, Operation *op, Value t, + RankedTensorType tensorTp, + function_ref bodyBuilder) { + Location loc = op->getLoc(); + auto enc = getSparseTensorEncoding(tensorTp); + assert(enc && "Generating Sparse Tensor COO Loop on a Dense Tensor!"); + + unsigned rank = tensorTp.getRank(); + Type elemTp = tensorTp.getElementType(); + + // Start an iterator over the tensor (in original index order). + auto noPerm = SparseTensorEncodingAttr::get( + rewriter.getContext(), enc.getDimLevelType(), AffineMap(), + enc.getPointerBitWidth(), enc.getIndexBitWidth()); + SmallVector sizes; + SmallVector params; + sizesFromPtr(rewriter, sizes, op, noPerm, tensorTp, t); + newParams(rewriter, params, op, tensorTp, noPerm, Action::kToIterator, sizes, + t); + Value iter = genNewCall(rewriter, op, params); + + // Construct a while loop over the iterator. + Value srcIdx = genAlloca(rewriter, loc, rank, rewriter.getIndexType()); + Value elemPtr = genAllocaScalar(rewriter, loc, elemTp); + SmallVector noArgs; + SmallVector noTypes; + auto whileOp = rewriter.create(loc, noTypes, noArgs); + Block *before = rewriter.createBlock(&whileOp.getBefore(), {}, noTypes); + rewriter.setInsertionPointToEnd(before); + Value cond = genGetNextCall(rewriter, op, iter, srcIdx, elemPtr); + rewriter.create(loc, cond, before->getArguments()); + Block *after = rewriter.createBlock(&whileOp.getAfter(), {}, noTypes); + rewriter.setInsertionPointToStart(after); + // Callback here to build loop body. + bodyBuilder(rewriter, loc, srcIdx, elemPtr); + rewriter.create(loc); + // Finish generating loop. + rewriter.setInsertionPointAfter(whileOp); + + // Free memory for iterator. + genDelCOOCall(rewriter, op, elemTp, iter); +} + +// Generate loop that iterates over a dense tensor. +// for i1 in dim1 +// .. +// for ik in dimk +// val = a[i1,..,ik] +// if val != 0 +// bodyBuilder(v, [i1, ..., ik]) +// TODO: It can be used by other operators (ReshapeOp, ConvertOP) conversion to +// reduce code repetition! +static void genDenseTensorIterationLoop( + ConversionPatternRewriter &rewriter, Operation *op, Value t, + RankedTensorType tensorTp, + function_ref bodyBuilder) { + Location loc = op->getLoc(); + auto enc = getSparseTensorEncoding(tensorTp); + assert(!enc && "Generating Densor Tensor Loop on a Sparse Tensor!"); + + unsigned rank = tensorTp.getRank(); + Value zero = constantIndex(rewriter, loc, 0); + Value one = constantIndex(rewriter, loc, 1); + + SmallVector lo; + SmallVector hi; + SmallVector st; + + // Fill out loop iteration information. + for (unsigned i = 0; i < rank; i++) { + lo.push_back(zero); + hi.push_back(linalg::createOrFoldDimOp(rewriter, loc, t, i)); + st.push_back(one); + } + + scf::buildLoopNest(rewriter, op->getLoc(), lo, hi, st, {}, + [&](OpBuilder &builder, Location loc, ValueRange ivs, + ValueRange args) -> scf::ValueVector { + // Invoke callback to build the body of the loop. + bodyBuilder(builder, loc, ivs); + return {}; + }); +} + //===----------------------------------------------------------------------===// // Conversion rules. //===----------------------------------------------------------------------===// @@ -760,7 +934,8 @@ rewriter.create(loc, cond, before->getArguments()); Block *after = rewriter.createBlock(&whileOp.getAfter(), {}, noTypes); rewriter.setInsertionPointToStart(after); - insertScalarIntoDenseTensor(rewriter, loc, elemPtr, dst, rank, ind); + SmallVector ivs = loadIndices(rewriter, loc, rank, ind); + insertScalarIntoDenseTensor(rewriter, loc, elemPtr, dst, ivs); rewriter.create(loc); rewriter.setInsertionPointAfter(whileOp); genDelCOOCall(rewriter, op, elemTp, iter); @@ -1043,6 +1218,139 @@ } }; +/// Sparse conversion rule for the concatenate operator. +class SparseTensorConcatConverter : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + LogicalResult + matchAndRewrite(ConcatenateOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + // The conversion works as follow: + // (1). When output is sparse, and mix of inputs: + // a_sparse = concat (b_dense, c_sparse, ....) + // => + // coo_for_a = newSparseCOO(shapeOf(a)) + // for i, j, k // dense input + // coo->add(adjustForOffset(i,j,k), b[i,j,k]) + // + // for elem in sparse_input + // coo->add(adjustForOffset(elem.indices), elem.value) + // ... + // a = newSparseTensor(coo_for_a) + // return a + // + // (2). When output is dense, and mix of inputs: + // a_dense = concat (b_dense, c_sparse, ....) + // => + // a = malloc(shapeOf(a)) + // for i, j, k // dense input + // a[ adjustForOffset(i,j,k) ] = b[i,j,k] + // + // for elem in sparse_input + // a[ adjustForOffset(elem.indices) ] = elem.value + // return a + Location loc = op.getLoc(); + auto dstTp = op.getType().cast(); + auto encDst = getSparseTensorEncoding(dstTp); + Type elemTp = dstTp.getElementType(); + uint64_t concatDim = op.getDimension().getZExtValue(); + unsigned rank = dstTp.getRank(); + + Value dst; // destination tensor + Value dstPerm; // destination tensor permutation (if sparse out) + // A pointer to the value being inserted (if dense => sparse) + Value elemPtr; + // Memory that holds the COO for destination tensor (if sparse out) + Value dstIdx; + // The offset applied to the dimenstion to be concated (starting from 0) + Value offset = constantIndex(rewriter, loc, 0); + + SmallVector sizes; + SmallVector params; + concatSizesFromInputs(rewriter, sizes, op, dstTp, op.getInputs(), + concatDim); + + if (encDst) { + // Start a new COO for the destination tensor. + newParams(rewriter, params, op, dstTp, encDst, Action::kEmptyCOO, sizes); + dst = genNewCall(rewriter, op, params); + dstPerm = params[2]; + elemPtr = genAllocaScalar(rewriter, loc, elemTp); + dstIdx = genAlloca(rewriter, loc, rank, rewriter.getIndexType()); + } else { + // TODO: Dense buffers should be allocated/deallocated via the callback + // in BufferizationOptions. + dst = allocDenseTensor(rewriter, loc, dstTp, sizes); + } + for (auto it : llvm::zip(op.getInputs(), adaptor.getInputs())) { + Value orignalOp = std::get<0>(it); // Input (with encoding) from Op + Value adaptedOp = std::get<1>(it); // Input (type converted) from adaptor + RankedTensorType srcTp = orignalOp.getType().cast(); + auto encSrc = getSparseTensorEncoding(srcTp); + if (encSrc) { + genSparseCOOIterationLoop( + rewriter, op, adaptedOp, srcTp, + [&](OpBuilder &builder, Location loc, Value idx, + Value elemPtr) -> void { + auto indVec = + loadIndices(builder, loc, rank, idx, concatDim, offset); + if (encDst) { + // Case: sparse => sparse + storeIndices(builder, loc, rank, dstIdx, indVec); + genAddEltCall(builder, op, elemTp, dst, elemPtr, dstIdx, + dstPerm); + } else { + // Case: sparse => dense + insertScalarIntoDenseTensor(builder, loc, elemPtr, dst, indVec); + } + }); + } else { + genDenseTensorIterationLoop( + rewriter, op, adaptedOp, srcTp, + [&](OpBuilder &builder, Location loc, ValueRange idx) -> void { + if (encDst) { + // Case: dense => sparse + storeIndices(builder, loc, rank, dstIdx, idx, concatDim, + offset); + Value val = genValueForDense(builder, loc, adaptedOp, idx); + builder.create(loc, val, elemPtr); + genAddEltCall(builder, op, elemTp, dst, elemPtr, dstIdx, + dstPerm); + } else { + // Case: dense => dense + Value val = genValueForDense(builder, loc, adaptedOp, idx); + SmallVector indVec(idx); + // Apply offset. + indVec[concatDim] = builder.create( + loc, indVec[concatDim], offset); + builder.create(loc, val, dst, indVec); + } + }); + } + // Accumulate offset. + // TODO: avoid calling sparseDimSize multiple times by caching the result! + Value curDim = encSrc ? sizeFromPtrAtDim(rewriter, op, encSrc, srcTp, + adaptedOp, concatDim) + : linalg::createOrFoldDimOp(rewriter, loc, + adaptedOp, concatDim); + + offset = rewriter.create(loc, offset, curDim); + } + if (encDst) { + params[6] = constantAction(rewriter, loc, Action::kFromCOO); + // In sparse output case, the destination holds the COO. + Value coo = dst; + params[7] = coo; + dst = genNewCall(rewriter, op, params); + // Release resources. + genDelCOOCall(rewriter, op, elemTp, coo); + rewriter.replaceOp(op, dst); + } else { + rewriter.replaceOpWithNewOp(op, dstTp, dst); + } + return success(); + } +}; /// Sparse conversion rule for the output operator. class SparseTensorOutConverter : public OpConversionPattern { public: @@ -1099,12 +1407,13 @@ SparseCastConverter, SparseTensorNewConverter, SparseReshapeConverter, SparseReshapeConverter, - SparseTensorAllocConverter, SparseTensorDeallocConverter, - SparseTensorToPointersConverter, SparseTensorToIndicesConverter, - SparseTensorToValuesConverter, SparseTensorLoadConverter, - SparseTensorLexInsertConverter, SparseTensorExpandConverter, - SparseTensorCompressConverter, SparseTensorOutConverter>( - typeConverter, patterns.getContext()); + SparseTensorConcatConverter, SparseTensorAllocConverter, + SparseTensorDeallocConverter, SparseTensorToPointersConverter, + SparseTensorToIndicesConverter, SparseTensorToValuesConverter, + SparseTensorLoadConverter, SparseTensorLexInsertConverter, + SparseTensorExpandConverter, SparseTensorCompressConverter, + SparseTensorOutConverter>(typeConverter, patterns.getContext()); + patterns.add(typeConverter, patterns.getContext(), options); } diff --git a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir @@ -0,0 +1,360 @@ +// RUN: mlir-opt %s --sparse-tensor-conversion --canonicalize --cse | FileCheck %s + +#SparseMatrix = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> + +#SparseMatrix_P = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed", "compressed" ], + dimOrdering = affine_map<(i,j) -> (j,i)> +}> + +// CHECK-LABEL: func.func @concat_mix_dense( +// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<2x4xf64>, +// CHECK-SAME: %[[TMP_arg1:.*]]: !llvm.ptr) +// CHECK-DAG: %[[TMP_c2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[TMP_c6_i32:.*]] = arith.constant 6 : i32 +// CHECK-DAG: %[[TMP_c1_i32:.*]] = arith.constant 1 : i32 +// CHECK-DAG: %[[TMP_c0_i32:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[TMP_c1_i8:.*]] = arith.constant 1 : i8 +// CHECK-DAG: %[[TMP_c3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[TMP_cst:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[TMP_c4:.*]] = arith.constant 4 : index +// CHECK: %[[TMP_0:.*]] = memref.alloc() : memref<5x4xf64> +// CHECK: linalg.fill ins(%[[TMP_cst]] : f64) outs(%[[TMP_0]] : memref<5x4xf64>) +// CHECK: scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] { +// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c4]] step %[[TMP_c1]] { +// CHECK: %[[TMP_12:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<2x4xf64> +// CHECK: %[[TMP_13:.*]] = arith.cmpf une, %[[TMP_12]], %[[TMP_cst]] : f64 +// CHECK: scf.if %[[TMP_13]] { +// CHECK: memref.store %[[TMP_12]], %[[TMP_0]][%[[TMP_arg2]], %[[TMP_arg3]]] : memref<5x4xf64> +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: %[[TMP_1:.*]] = memref.alloca() : memref<2xi8> +// CHECK: %[[TMP_2:.*]] = memref.cast %[[TMP_1]] : memref<2xi8> to memref +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_1]][%[[TMP_c0]]] : memref<2xi8> +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_1]][%[[TMP_c1]]] : memref<2xi8> +// CHECK: %[[TMP_3:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_4:.*]] = memref.cast %[[TMP_3]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c3]], %[[TMP_3]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c4]], %[[TMP_3]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_5:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_6:.*]] = memref.cast %[[TMP_5]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c0]], %[[TMP_5]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c1]], %[[TMP_5]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_7:.*]] = call @newSparseTensor(%[[TMP_2]], %[[TMP_4]], %[[TMP_6]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK: %[[TMP_8:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_9:.*]] = memref.cast %[[TMP_8]] : memref<2xindex> to memref +// CHECK: %[[TMP_10:.*]] = memref.alloca() : memref +// CHECK: scf.while : () -> () { +// CHECK: %[[TMP_12:.*]] = func.call @getNextF64(%[[TMP_7]], %[[TMP_9]], %[[TMP_10]]) : (!llvm.ptr, memref, memref) -> i1 +// CHECK: scf.condition(%[[TMP_12]]) +// CHECK: } do { +// CHECK: %[[TMP_12:.*]] = memref.load %[[TMP_8]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: %[[TMP_13:.*]] = arith.addi %[[TMP_12]], %[[TMP_c2]] : index +// CHECK: %[[TMP_14:.*]] = memref.load %[[TMP_8]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_15:.*]] = memref.load %[[TMP_10]][] : memref +// CHECK: memref.store %[[TMP_15]], %[[TMP_0]][%[[TMP_13]], %[[TMP_14]]] : memref<5x4xf64> +// CHECK: scf.yield +// CHECK: } +// CHECK: call @delSparseTensorCOOF64(%[[TMP_7]]) : (!llvm.ptr) -> () +// CHECK: %[[TMP_11:.*]] = bufferization.to_tensor %[[TMP_0]] : memref<5x4xf64> +// CHECK: return %[[TMP_11]] : tensor<5x4xf64> +// CHECK: } +func.func @concat_mix_dense(%arg0: tensor<2x4xf64>, %arg1: tensor<3x4xf64, #SparseMatrix>) -> tensor<5x4xf64> { + %0 = sparse_tensor.concatenate %arg0, %arg1 {dimension = 0 : index} + : tensor<2x4xf64>, tensor<3x4xf64, #SparseMatrix> to tensor<5x4xf64> + return %0 : tensor<5x4xf64> +} + +// CHECK-LABEL: func.func @concat_mix_sparse( +// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<2x4xf64>, +// CHECK-SAME: %[[TMP_arg1:.*]]: !llvm.ptr) +// CHECK-DAG: %[[TMP_c2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[TMP_c2_i32:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[TMP_c6_i32:.*]] = arith.constant 6 : i32 +// CHECK-DAG: %[[TMP_c3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[TMP_cst:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK-DAG: %[[TMP_c4_i32:.*]] = arith.constant 4 : i32 +// CHECK-DAG: %[[TMP_c1_i32:.*]] = arith.constant 1 : i32 +// CHECK-DAG: %[[TMP_c0_i32:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[TMP_c5:.*]] = arith.constant 5 : index +// CHECK-DAG: %[[TMP_c4:.*]] = arith.constant 4 : index +// CHECK-DAG: %[[TMP_c1_i8:.*]] = arith.constant 1 : i8 +// CHECK: %[[TMP_0:.*]] = memref.alloca() : memref<2xi8> +// CHECK: %[[TMP_1:.*]] = memref.cast %[[TMP_0]] : memref<2xi8> to memref +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_0]][%[[TMP_c0]]] : memref<2xi8> +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_0]][%[[TMP_c1]]] : memref<2xi8> +// CHECK: %[[TMP_2:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_3:.*]] = memref.cast %[[TMP_2]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c5]], %[[TMP_2]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c4]], %[[TMP_2]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_4:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_5:.*]] = memref.cast %[[TMP_4]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c0]], %[[TMP_4]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c1]], %[[TMP_4]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_6:.*]] = llvm.mlir.null : !llvm.ptr +// CHECK: %[[TMP_7:.*]] = call @newSparseTensor(%[[TMP_1]], %[[TMP_3]], %[[TMP_5]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c4_i32]], %[[TMP_6]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK: %[[TMP_8:.*]] = memref.alloca() : memref +// CHECK: %[[TMP_9:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_10:.*]] = memref.cast %[[TMP_9]] : memref<2xindex> to memref +// CHECK: scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] { +// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c4]] step %[[TMP_c1]] { +// CHECK: memref.store %[[TMP_arg2]], %[[TMP_9]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_arg3]], %[[TMP_9]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_22:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<2x4xf64> +// CHECK: %[[TMP_23:.*]] = arith.cmpf une, %[[TMP_22]], %[[TMP_cst]] : f64 +// CHECK: scf.if %[[TMP_23]] { +// CHECK: memref.store %[[TMP_22]], %[[TMP_8]][] : memref +// CHECK: %[[TMP_24:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_8]], %[[TMP_10]], %[[TMP_5]]) : (!llvm.ptr, memref, memref, memref) -> !llvm.ptr +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: %[[TMP_11:.*]] = memref.alloca() : memref<2xi8> +// CHECK: %[[TMP_12:.*]] = memref.cast %[[TMP_11]] : memref<2xi8> to memref +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_11]][%[[TMP_c0]]] : memref<2xi8> +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_11]][%[[TMP_c1]]] : memref<2xi8> +// CHECK: %[[TMP_13:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_14:.*]] = memref.cast %[[TMP_13]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c3]], %[[TMP_13]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c4]], %[[TMP_13]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_15:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_16:.*]] = memref.cast %[[TMP_15]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c0]], %[[TMP_15]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c1]], %[[TMP_15]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_17:.*]] = call @newSparseTensor(%[[TMP_12]], %[[TMP_14]], %[[TMP_16]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK: %[[TMP_18:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_19:.*]] = memref.cast %[[TMP_18]] : memref<2xindex> to memref +// CHECK: %[[TMP_20:.*]] = memref.alloca() : memref +// CHECK: scf.while : () -> () { +// CHECK: %[[TMP_22:.*]] = func.call @getNextF64(%[[TMP_17]], %[[TMP_19]], %[[TMP_20]]) : (!llvm.ptr, memref, memref) -> i1 +// CHECK: scf.condition(%[[TMP_22]]) +// CHECK: } do { +// CHECK: %[[TMP_22:.*]] = memref.load %[[TMP_18]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: %[[TMP_23:.*]] = arith.addi %[[TMP_22]], %[[TMP_c2]] : index +// CHECK: %[[TMP_24:.*]] = memref.load %[[TMP_18]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_23]], %[[TMP_9]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_24]], %[[TMP_9]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_25:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_20]], %[[TMP_10]], %[[TMP_5]]) : (!llvm.ptr, memref, memref, memref) -> !llvm.ptr +// CHECK: scf.yield +// CHECK: } +// CHECK: call @delSparseTensorCOOF64(%[[TMP_17]]) : (!llvm.ptr) -> () +// CHECK: %[[TMP_21:.*]] = call @newSparseTensor(%[[TMP_1]], %[[TMP_3]], %[[TMP_5]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c2_i32]], %[[TMP_7]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK: call @delSparseTensorCOOF64(%[[TMP_7]]) : (!llvm.ptr) -> () +// CHECK: return %[[TMP_21]] : !llvm.ptr +// CHECK: } +func.func @concat_mix_sparse(%arg0: tensor<2x4xf64>, %arg1: tensor<3x4xf64, #SparseMatrix>) -> tensor<5x4xf64, #SparseMatrix> { + %0 = sparse_tensor.concatenate %arg0, %arg1 {dimension = 0 : index} + : tensor<2x4xf64>, tensor<3x4xf64, #SparseMatrix> to tensor<5x4xf64, #SparseMatrix> + return %0 : tensor<5x4xf64, #SparseMatrix> +} + +// CHECK-LABEL: func.func @concat_mix_sparse_perm_dim1( +// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<4x2xf64>, +// CHECK-SAME: %[[TMP_arg1:.*]]: !llvm.ptr) +// CHECK-DAG: %[[TMP_c2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[TMP_c2_i32:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[TMP_c6_i32:.*]] = arith.constant 6 : i32 +// CHECK-DAG: %[[TMP_c3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[TMP_cst:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK-DAG: %[[TMP_c4_i32:.*]] = arith.constant 4 : i32 +// CHECK-DAG: %[[TMP_c1_i32:.*]] = arith.constant 1 : i32 +// CHECK-DAG: %[[TMP_c0_i32:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[TMP_c4:.*]] = arith.constant 4 : index +// CHECK-DAG: %[[TMP_c5:.*]] = arith.constant 5 : index +// CHECK-DAG: %[[TMP_c1_i8:.*]] = arith.constant 1 : i8 +// CHECK: %[[TMP_0:.*]] = memref.alloca() : memref<2xi8> +// CHECK: %[[TMP_1:.*]] = memref.cast %[[TMP_0]] : memref<2xi8> to memref +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_0]][%[[TMP_c0]]] : memref<2xi8> +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_0]][%[[TMP_c1]]] : memref<2xi8> +// CHECK: %[[TMP_2:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_3:.*]] = memref.cast %[[TMP_2]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c4]], %[[TMP_2]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c5]], %[[TMP_2]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_4:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_5:.*]] = memref.cast %[[TMP_4]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c1]], %[[TMP_4]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c0]], %[[TMP_4]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_6:.*]] = llvm.mlir.null : !llvm.ptr +// CHECK: %[[TMP_7:.*]] = call @newSparseTensor(%[[TMP_1]], %[[TMP_3]], %[[TMP_5]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c4_i32]], %[[TMP_6]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK: %[[TMP_8:.*]] = memref.alloca() : memref +// CHECK: %[[TMP_9:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_10:.*]] = memref.cast %[[TMP_9]] : memref<2xindex> to memref +// CHECK: scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c4]] step %[[TMP_c1]] { +// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] { +// CHECK: memref.store %[[TMP_arg2]], %[[TMP_9]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_arg3]], %[[TMP_9]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_22:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<4x2xf64> +// CHECK: %[[TMP_23:.*]] = arith.cmpf une, %[[TMP_22]], %[[TMP_cst]] : f64 +// CHECK: scf.if %[[TMP_23]] { +// CHECK: memref.store %[[TMP_22]], %[[TMP_8]][] : memref +// CHECK: %[[TMP_24:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_8]], %[[TMP_10]], %[[TMP_5]]) : (!llvm.ptr, memref, memref, memref) -> !llvm.ptr +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: %[[TMP_11:.*]] = memref.alloca() : memref<2xi8> +// CHECK: %[[TMP_12:.*]] = memref.cast %[[TMP_11]] : memref<2xi8> to memref +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_11]][%[[TMP_c0]]] : memref<2xi8> +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_11]][%[[TMP_c1]]] : memref<2xi8> +// CHECK: %[[TMP_13:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_14:.*]] = memref.cast %[[TMP_13]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c4]], %[[TMP_13]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c3]], %[[TMP_13]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_15:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_16:.*]] = memref.cast %[[TMP_15]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c0]], %[[TMP_15]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c1]], %[[TMP_15]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_17:.*]] = call @newSparseTensor(%[[TMP_12]], %[[TMP_14]], %[[TMP_16]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK: %[[TMP_18:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_19:.*]] = memref.cast %[[TMP_18]] : memref<2xindex> to memref +// CHECK: %[[TMP_20:.*]] = memref.alloca() : memref +// CHECK: scf.while : () -> () { +// CHECK: %[[TMP_22:.*]] = func.call @getNextF64(%[[TMP_17]], %[[TMP_19]], %[[TMP_20]]) : (!llvm.ptr, memref, memref) -> i1 +// CHECK: scf.condition(%[[TMP_22]]) +// CHECK: } do { +// CHECK: %[[TMP_22:.*]] = memref.load %[[TMP_18]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_18]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_24:.*]] = arith.addi %[[TMP_23]], %[[TMP_c2]] : index +// CHECK: memref.store %[[TMP_22]], %[[TMP_9]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_24]], %[[TMP_9]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_25:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_20]], %[[TMP_10]], %[[TMP_5]]) : (!llvm.ptr, memref, memref, memref) -> !llvm.ptr +// CHECK: scf.yield +// CHECK: } +// CHECK: call @delSparseTensorCOOF64(%[[TMP_17]]) : (!llvm.ptr) -> () +// CHECK: %[[TMP_21:.*]] = call @newSparseTensor(%[[TMP_1]], %[[TMP_3]], %[[TMP_5]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c2_i32]], %[[TMP_7]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK: call @delSparseTensorCOOF64(%[[TMP_7]]) : (!llvm.ptr) -> () +// CHECK: return %[[TMP_21]] : !llvm.ptr +// CHECK: } +func.func @concat_mix_sparse_perm_dim1(%arg0: tensor<4x2xf64>, %arg1: tensor<4x3xf64, #SparseMatrix_P>) -> tensor<4x5xf64, #SparseMatrix_P> { + %0 = sparse_tensor.concatenate %arg0, %arg1 {dimension = 1 : index} + : tensor<4x2xf64>, tensor<4x3xf64, #SparseMatrix_P> to tensor<4x5xf64, #SparseMatrix_P> + return %0 : tensor<4x5xf64, #SparseMatrix_P> +} + +// CHECK-LABEL: func.func @concat_mix_dense_perm_dim1( +// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<4x2xf64>, +// CHECK-SAME: %[[TMP_arg1:.*]]: !llvm.ptr) +// CHECK-DAG: %[[TMP_c2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[TMP_c6_i32:.*]] = arith.constant 6 : i32 +// CHECK-DAG: %[[TMP_c1_i32:.*]] = arith.constant 1 : i32 +// CHECK-DAG: %[[TMP_c0_i32:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[TMP_c1_i8:.*]] = arith.constant 1 : i8 +// CHECK-DAG: %[[TMP_c3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[TMP_cst:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[TMP_c4:.*]] = arith.constant 4 : index +// CHECK: %[[TMP_0:.*]] = memref.alloc() : memref<4x5xf64> +// CHECK: linalg.fill ins(%[[TMP_cst]] : f64) outs(%[[TMP_0]] : memref<4x5xf64>) +// CHECK: scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c4]] step %[[TMP_c1]] { +// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] { +// CHECK: %[[TMP_12:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<4x2xf64> +// CHECK: %[[TMP_13:.*]] = arith.cmpf une, %[[TMP_12]], %[[TMP_cst]] : f64 +// CHECK: scf.if %[[TMP_13]] { +// CHECK: memref.store %[[TMP_12]], %[[TMP_0]][%[[TMP_arg2]], %[[TMP_arg3]]] : memref<4x5xf64> +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: %[[TMP_1:.*]] = memref.alloca() : memref<2xi8> +// CHECK: %[[TMP_2:.*]] = memref.cast %[[TMP_1]] : memref<2xi8> to memref +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_1]][%[[TMP_c0]]] : memref<2xi8> +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_1]][%[[TMP_c1]]] : memref<2xi8> +// CHECK: %[[TMP_3:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_4:.*]] = memref.cast %[[TMP_3]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c4]], %[[TMP_3]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c3]], %[[TMP_3]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_5:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_6:.*]] = memref.cast %[[TMP_5]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c0]], %[[TMP_5]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c1]], %[[TMP_5]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_7:.*]] = call @newSparseTensor(%[[TMP_2]], %[[TMP_4]], %[[TMP_6]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK: %[[TMP_8:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_9:.*]] = memref.cast %[[TMP_8]] : memref<2xindex> to memref +// CHECK: %[[TMP_10:.*]] = memref.alloca() : memref +// CHECK: scf.while : () -> () { +// CHECK: %[[TMP_12:.*]] = func.call @getNextF64(%[[TMP_7]], %[[TMP_9]], %[[TMP_10]]) : (!llvm.ptr, memref, memref) -> i1 +// CHECK: scf.condition(%[[TMP_12]]) +// CHECK: } do { +// CHECK: %[[TMP_12:.*]] = memref.load %[[TMP_8]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: %[[TMP_13:.*]] = memref.load %[[TMP_8]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_14:.*]] = arith.addi %[[TMP_13]], %[[TMP_c2]] : index +// CHECK: %[[TMP_15:.*]] = memref.load %[[TMP_10]][] : memref +// CHECK: memref.store %[[TMP_15]], %[[TMP_0]][%[[TMP_12]], %[[TMP_14]]] : memref<4x5xf64> +// CHECK: scf.yield +// CHECK: } +// CHECK: call @delSparseTensorCOOF64(%[[TMP_7]]) : (!llvm.ptr) -> () +// CHECK: %[[TMP_11:.*]] = bufferization.to_tensor %[[TMP_0]] : memref<4x5xf64> +// CHECK: return %[[TMP_11]] : tensor<4x5xf64> +// CHECK: } +func.func @concat_mix_dense_perm_dim1(%arg0: tensor<4x2xf64>, %arg1: tensor<4x3xf64, #SparseMatrix_P>) -> tensor<4x5xf64> { + %0 = sparse_tensor.concatenate %arg0, %arg1 {dimension = 1 : index} + : tensor<4x2xf64>, tensor<4x3xf64, #SparseMatrix_P> to tensor<4x5xf64> + return %0 : tensor<4x5xf64> +} + +// CHECK-LABEL: func.func @concat_mix_dense_perm_dim1_dyn( +// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<3x2xf64>, +// CHECK-SAME: %[[TMP_arg1:.*]]: !llvm.ptr) +// CHECK-DAG: %[[TMP_c2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[TMP_c6_i32:.*]] = arith.constant 6 : i32 +// CHECK-DAG: %[[TMP_c1_i32:.*]] = arith.constant 1 : i32 +// CHECK-DAG: %[[TMP_c0_i32:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[TMP_c1_i8:.*]] = arith.constant 1 : i8 +// CHECK-DAG: %[[TMP_cst:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[TMP_c3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index +// CHECK: %[[TMP_0:.*]] = memref.alloc() : memref<3x5xf64> +// CHECK: %[[TMP_1:.*]] = memref.cast %[[TMP_0]] : memref<3x5xf64> to memref +// CHECK: linalg.fill ins(%[[TMP_cst]] : f64) outs(%[[TMP_0]] : memref<3x5xf64>) +// CHECK: scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c3]] step %[[TMP_c1]] { +// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] { +// CHECK: %[[TMP_13:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<3x2xf64> +// CHECK: %[[TMP_14:.*]] = arith.cmpf une, %[[TMP_13]], %[[TMP_cst]] : f64 +// CHECK: scf.if %[[TMP_14]] { +// CHECK: memref.store %[[TMP_13]], %[[TMP_0]][%[[TMP_arg2]], %[[TMP_arg3]]] : memref<3x5xf64> +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: %[[TMP_2:.*]] = memref.alloca() : memref<2xi8> +// CHECK: %[[TMP_3:.*]] = memref.cast %[[TMP_2]] : memref<2xi8> to memref +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_2]][%[[TMP_c0]]] : memref<2xi8> +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_2]][%[[TMP_c1]]] : memref<2xi8> +// CHECK: %[[TMP_4:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_5:.*]] = memref.cast %[[TMP_4]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c3]], %[[TMP_4]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c3]], %[[TMP_4]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_6:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_7:.*]] = memref.cast %[[TMP_6]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c0]], %[[TMP_6]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c1]], %[[TMP_6]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_8:.*]] = call @newSparseTensor(%[[TMP_3]], %[[TMP_5]], %[[TMP_7]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK: %[[TMP_9:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_10:.*]] = memref.cast %[[TMP_9]] : memref<2xindex> to memref +// CHECK: %[[TMP_11:.*]] = memref.alloca() : memref +// CHECK: scf.while : () -> () { +// CHECK: %[[TMP_13:.*]] = func.call @getNextF64(%[[TMP_8]], %[[TMP_10]], %[[TMP_11]]) : (!llvm.ptr, memref, memref) -> i1 +// CHECK: scf.condition(%[[TMP_13]]) +// CHECK: } do { +// CHECK: %[[TMP_13:.*]] = memref.load %[[TMP_9]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: %[[TMP_14:.*]] = memref.load %[[TMP_9]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_15:.*]] = arith.addi %[[TMP_14]], %[[TMP_c2]] : index +// CHECK: %[[TMP_16:.*]] = memref.load %[[TMP_11]][] : memref +// CHECK: memref.store %[[TMP_16]], %[[TMP_0]][%[[TMP_13]], %[[TMP_15]]] : memref<3x5xf64> +// CHECK: scf.yield +// CHECK: } +// CHECK: call @delSparseTensorCOOF64(%[[TMP_8]]) : (!llvm.ptr) -> () +// CHECK: %[[TMP_12:.*]] = bufferization.to_tensor %[[TMP_1]] : memref +// CHECK: return %[[TMP_12]] : tensor +// CHECK: } +// CHECK: } +func.func @concat_mix_dense_perm_dim1_dyn(%arg0: tensor<3x2xf64>, %arg1: tensor<3x3xf64, #SparseMatrix>) -> tensor { + %0 = sparse_tensor.concatenate %arg0, %arg1 {dimension = 1 : index} + : tensor<3x2xf64>, tensor<3x3xf64, #SparseMatrix> to tensor + return %0 : tensor +} diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/concatenate.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/concatenate.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/concatenate.mlir @@ -0,0 +1,430 @@ +// RUN: mlir-opt %s --sparse-compiler | \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +#MAT_C_C = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> +#MAT_D_C = #sparse_tensor.encoding<{dimLevelType = ["dense", "compressed"]}> +#MAT_C_D = #sparse_tensor.encoding<{dimLevelType = ["compressed", "dense"]}> + +#MAT_C_C_P = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed", "compressed" ], + dimOrdering = affine_map<(i,j) -> (j,i)> +}> + +#MAT_C_D_P = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed", "dense" ], + dimOrdering = affine_map<(i,j) -> (j,i)> +}> + +#MAT_D_C_P = #sparse_tensor.encoding<{ + dimLevelType = [ "dense", "compressed" ], + dimOrdering = affine_map<(i,j) -> (j,i)> +}> + +module { + // + // Tests without permutation. + // + + // Concats all sparse matrices (with different encodings) to a sparse matrix. + func.func @concat_sparse_sparse(%arg0: tensor<2x4xf64, #MAT_C_C>, %arg1: tensor<3x4xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index} + : tensor<2x4xf64, #MAT_C_C>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<9x4xf64, #MAT_C_C> + return %0 : tensor<9x4xf64, #MAT_C_C> + } + + // Concats all sparse matrices (with different encodings) to a dense matrix. + func.func @concat_sparse_dense(%arg0: tensor<2x4xf64, #MAT_C_C>, %arg1: tensor<3x4xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index} + : tensor<2x4xf64, #MAT_C_C>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<9x4xf64> + return %0 : tensor<9x4xf64> + } + + // Concats mix sparse and dense matrices to a sparse matrix + func.func @concat_mix_sparse(%arg0: tensor<2x4xf64>, %arg1: tensor<3x4xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index} + : tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<9x4xf64, #MAT_C_C> + return %0 : tensor<9x4xf64, #MAT_C_C> + } + + // Concats mix sparse and dense matrices to a dense matrix + func.func @concat_mix_dense(%arg0: tensor<2x4xf64>, %arg1: tensor<3x4xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index} + : tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<9x4xf64> + return %0 : tensor<9x4xf64> + } + + // + // Tests with permutation. + // + + // Concats all sparse matrices (with different encodings) to a sparse matrix. + func.func @concat_sparse_sparse_perm(%arg0: tensor<2x4xf64, #MAT_C_C_P>, %arg1: tensor<3x4xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C_P> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index} + : tensor<2x4xf64, #MAT_C_C_P>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<9x4xf64, #MAT_C_C_P> + return %0 : tensor<9x4xf64, #MAT_C_C_P> + } + + // Concats all sparse matrices (with different encodings) to a dense matrix. + func.func @concat_sparse_dense_perm(%arg0: tensor<2x4xf64, #MAT_C_C_P>, %arg1: tensor<3x4xf64, #MAT_C_D_P>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index} + : tensor<2x4xf64, #MAT_C_C_P>, tensor<3x4xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C> to tensor<9x4xf64> + return %0 : tensor<9x4xf64> + } + + // Concats mix sparse and dense matrices to a sparse matrix + func.func @concat_mix_sparse_perm(%arg0: tensor<2x4xf64>, %arg1: tensor<3x4xf64, #MAT_C_D_P>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index} + : tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C> to tensor<9x4xf64, #MAT_C_C> + return %0 : tensor<9x4xf64, #MAT_C_C> + } + + // Concats mix sparse and dense matrices to a dense matrix + func.func @concat_mix_dense_perm(%arg0: tensor<2x4xf64>, %arg1: tensor<3x4xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C_P>) -> tensor<9x4xf64> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index} + : tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C_P> to tensor<9x4xf64> + return %0 : tensor<9x4xf64> + } + + // + // Tests without perumutation (concatenate on dimension 1) + // + + // Concats all sparse matrices (with different encodings) to a sparse matrix. + func.func @concat_sparse_sparse_dim1(%arg0: tensor<4x2xf64, #MAT_C_C>, %arg1: tensor<4x3xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index} + : tensor<4x2xf64, #MAT_C_C>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<4x9xf64, #MAT_C_C> + return %0 : tensor<4x9xf64, #MAT_C_C> + } + + // Concats all sparse matrices (with different encodings) to a dense matrix. + func.func @concat_sparse_dense_dim1(%arg0: tensor<4x2xf64, #MAT_C_C>, %arg1: tensor<4x3xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index} + : tensor<4x2xf64, #MAT_C_C>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<4x9xf64> + return %0 : tensor<4x9xf64> + } + + // Concats mix sparse and dense matrices to a sparse matrix + func.func @concat_mix_sparse_dim1(%arg0: tensor<4x2xf64>, %arg1: tensor<4x3xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index} + : tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<4x9xf64, #MAT_C_C> + return %0 : tensor<4x9xf64, #MAT_C_C> + } + + // Concats mix sparse and dense matrices to a dense matrix + func.func @concat_mix_dense_dim1(%arg0: tensor<4x2xf64>, %arg1: tensor<4x3xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index} + : tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<4x9xf64> + return %0 : tensor<4x9xf64> + } + + // + // Tests with perumutation (concatenate on dimension 1) + // + + // Concats all sparse matrices (with different encodings) to a sparse matrix. + func.func @concat_sparse_sparse_perm_dim1(%arg0: tensor<4x2xf64, #MAT_C_C_P>, %arg1: tensor<4x3xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C_P> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index} + : tensor<4x2xf64, #MAT_C_C_P>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<4x9xf64, #MAT_C_C_P> + return %0 : tensor<4x9xf64, #MAT_C_C_P> + } + + // Concats all sparse matrices (with different encodings) to a dense matrix. + func.func @concat_sparse_dense_perm_dim1(%arg0: tensor<4x2xf64, #MAT_C_C_P>, %arg1: tensor<4x3xf64, #MAT_C_D_P>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index} + : tensor<4x2xf64, #MAT_C_C_P>, tensor<4x3xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C> to tensor<4x9xf64> + return %0 : tensor<4x9xf64> + } + + // Concats mix sparse and dense matrices to a sparse matrix + func.func @concat_mix_sparse_perm_dim1(%arg0: tensor<4x2xf64>, %arg1: tensor<4x3xf64, #MAT_C_D_P>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index} + : tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C> to tensor<4x9xf64, #MAT_C_C> + return %0 : tensor<4x9xf64, #MAT_C_C> + } + + // Concats mix sparse and dense matrices to a dense matrix + func.func @concat_mix_dense_perm_dim1(%arg0: tensor<4x2xf64>, %arg1: tensor<4x3xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C_P>) -> tensor<4x9xf64> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index} + : tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C_P> to tensor<4x9xf64> + return %0 : tensor<4x9xf64> + } + + // + // Concats mix sparse and dense matrices to a sparse matrix (with dynamic sizes) + // + func.func @concat_mix_sparse_dyn(%arg0: tensor<4x2xf64>, %arg1: tensor<4x3xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index} + : tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor + return %0 : tensor + } + + func.func @dump_mat_9x4(%A: tensor<9x4xf64, #MAT_C_C>) { + %c0 = arith.constant 0 : index + %du = arith.constant -1.0 : f64 + + %c = sparse_tensor.convert %A : tensor<9x4xf64, #MAT_C_C> to tensor<9x4xf64> + %m = bufferization.to_memref %c : memref<9x4xf64> + %v = vector.transfer_read %m[%c0, %c0], %du: memref<9x4xf64>, vector<9x4xf64> + vector.print %v : vector<9x4xf64> + + %1 = sparse_tensor.values %A : tensor<9x4xf64, #MAT_C_C> to memref + %2 = vector.transfer_read %1[%c0], %du: memref, vector<36xf64> + vector.print %2 : vector<36xf64> + + return + } + + func.func @dump_mat_perm_9x4(%A: tensor<9x4xf64, #MAT_C_C_P>) { + %c0 = arith.constant 0 : index + %du = arith.constant -1.0 : f64 + + %c = sparse_tensor.convert %A : tensor<9x4xf64, #MAT_C_C_P> to tensor<9x4xf64> + %m = bufferization.to_memref %c : memref<9x4xf64> + %v = vector.transfer_read %m[%c0, %c0], %du: memref<9x4xf64>, vector<9x4xf64> + vector.print %v : vector<9x4xf64> + + %1 = sparse_tensor.values %A : tensor<9x4xf64, #MAT_C_C_P> to memref + %2 = vector.transfer_read %1[%c0], %du: memref, vector<36xf64> + vector.print %2 : vector<36xf64> + + return + } + + func.func @dump_mat_dense_9x4(%A: tensor<9x4xf64>) { + %c0 = arith.constant 0 : index + %du = arith.constant -1.0 : f64 + + %m = bufferization.to_memref %A : memref<9x4xf64> + %v = vector.transfer_read %m[%c0, %c0], %du: memref<9x4xf64>, vector<9x4xf64> + vector.print %v : vector<9x4xf64> + + return + } + + func.func @dump_mat_4x9(%A: tensor<4x9xf64, #MAT_C_C>) { + %c0 = arith.constant 0 : index + %du = arith.constant -1.0 : f64 + + %c = sparse_tensor.convert %A : tensor<4x9xf64, #MAT_C_C> to tensor<4x9xf64> + %m = bufferization.to_memref %c : memref<4x9xf64> + %v = vector.transfer_read %m[%c0, %c0], %du: memref<4x9xf64>, vector<4x9xf64> + vector.print %v : vector<4x9xf64> + + %1 = sparse_tensor.values %A : tensor<4x9xf64, #MAT_C_C> to memref + %2 = vector.transfer_read %1[%c0], %du: memref, vector<36xf64> + vector.print %2 : vector<36xf64> + + return + } + + func.func @dump_mat_dyn(%A: tensor) { + %c0 = arith.constant 0 : index + %du = arith.constant -1.0 : f64 + + %c = sparse_tensor.convert %A : tensor to tensor + %m = bufferization.to_memref %c : memref + %v = vector.transfer_read %m[%c0, %c0], %du: memref, vector<4x9xf64> + vector.print %v : vector<4x9xf64> + + %1 = sparse_tensor.values %A : tensor to memref + %2 = vector.transfer_read %1[%c0], %du: memref, vector<36xf64> + vector.print %2 : vector<36xf64> + + return + } + + func.func @dump_mat_perm_4x9(%A: tensor<4x9xf64, #MAT_C_C_P>) { + %c0 = arith.constant 0 : index + %du = arith.constant -1.0 : f64 + + %c = sparse_tensor.convert %A : tensor<4x9xf64, #MAT_C_C_P> to tensor<4x9xf64> + %m = bufferization.to_memref %c : memref<4x9xf64> + %v = vector.transfer_read %m[%c0, %c0], %du: memref<4x9xf64>, vector<4x9xf64> + vector.print %v : vector<4x9xf64> + + %1 = sparse_tensor.values %A : tensor<4x9xf64, #MAT_C_C_P> to memref + %2 = vector.transfer_read %1[%c0], %du: memref, vector<36xf64> + vector.print %2 : vector<36xf64> + + return + } + + func.func @dump_mat_dense_4x9(%A: tensor<4x9xf64>) { + %c0 = arith.constant 0 : index + %du = arith.constant -1.0 : f64 + + %m = bufferization.to_memref %A : memref<4x9xf64> + %v = vector.transfer_read %m[%c0, %c0], %du: memref<4x9xf64>, vector<4x9xf64> + vector.print %v : vector<4x9xf64> + + return + } + + // Driver method to call and verify kernels. + func.func @entry() { + %m42 = arith.constant dense< + [ [ 1.0, 0.0 ], + [ 3.1, 0.0 ], + [ 0.0, 2.0 ], + [ 0.0, 0.0 ] ]> : tensor<4x2xf64> + %m43 = arith.constant dense< + [ [ 1.0, 0.0, 1.0 ], + [ 1.0, 0.0, 0.5 ], + [ 0.0, 0.0, 1.0 ], + [ 5.0, 2.0, 0.0 ] ]> : tensor<4x3xf64> + %m24 = arith.constant dense< + [ [ 1.0, 0.0, 3.0, 0.0], + [ 0.0, 2.0, 0.0, 0.0] ]> : tensor<2x4xf64> + %m34 = arith.constant dense< + [ [ 1.0, 0.0, 1.0, 1.0], + [ 0.0, 0.5, 0.0, 0.0], + [ 1.0, 5.0, 2.0, 0.0] ]> : tensor<3x4xf64> + %m44 = arith.constant dense< + [ [ 0.0, 0.0, 1.5, 1.0], + [ 0.0, 3.5, 0.0, 0.0], + [ 1.0, 5.0, 2.0, 0.0], + [ 1.0, 0.5, 0.0, 0.0] ]> : tensor<4x4xf64> + + %sm24cc = sparse_tensor.convert %m24 : tensor<2x4xf64> to tensor<2x4xf64, #MAT_C_C> + %sm34cd = sparse_tensor.convert %m34 : tensor<3x4xf64> to tensor<3x4xf64, #MAT_C_D> + %sm42cc = sparse_tensor.convert %m42 : tensor<4x2xf64> to tensor<4x2xf64, #MAT_C_C> + %sm43cd = sparse_tensor.convert %m43 : tensor<4x3xf64> to tensor<4x3xf64, #MAT_C_D> + %sm44dc = sparse_tensor.convert %m44 : tensor<4x4xf64> to tensor<4x4xf64, #MAT_D_C> + + %sm24ccp = sparse_tensor.convert %m24 : tensor<2x4xf64> to tensor<2x4xf64, #MAT_C_C_P> + %sm34cdp = sparse_tensor.convert %m34 : tensor<3x4xf64> to tensor<3x4xf64, #MAT_C_D_P> + %sm42ccp = sparse_tensor.convert %m42 : tensor<4x2xf64> to tensor<4x2xf64, #MAT_C_C_P> + %sm43cdp = sparse_tensor.convert %m43 : tensor<4x3xf64> to tensor<4x3xf64, #MAT_C_D_P> + %sm44dcp = sparse_tensor.convert %m44 : tensor<4x4xf64> to tensor<4x4xf64, #MAT_D_C_P> + + %sm43cd_dyn = sparse_tensor.convert %m43 : tensor<4x3xf64> to tensor + %sm44dc_dyn = sparse_tensor.convert %m44 : tensor<4x4xf64> to tensor + + // CHECK: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) ) + // CHECK-NEXT: ( 1, 3, 2, 1, 0, 1, 1, 0, 0.5, 0, 0, 1, 5, 2, 0, 1.5, 1, 3.5, 1, 5, 2, 1, 0.5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ) + %0 = call @concat_sparse_sparse(%sm24cc, %sm34cd, %sm44dc) + : (tensor<2x4xf64, #MAT_C_C>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C> + call @dump_mat_9x4(%0) : (tensor<9x4xf64, #MAT_C_C>) -> () + + // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) ) + %1 = call @concat_sparse_dense(%sm24cc, %sm34cd, %sm44dc) + : (tensor<2x4xf64, #MAT_C_C>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64> + call @dump_mat_dense_9x4(%1) : (tensor<9x4xf64>) -> () + + // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) ) + // CHECK-NEXT: ( 1, 3, 2, 1, 0, 1, 1, 0, 0.5, 0, 0, 1, 5, 2, 0, 1.5, 1, 3.5, 1, 5, 2, 1, 0.5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ) + %2 = call @concat_mix_sparse(%m24, %sm34cd, %sm44dc) + : (tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C> + call @dump_mat_9x4(%2) : (tensor<9x4xf64, #MAT_C_C>) -> () + + // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) ) + %3 = call @concat_mix_dense(%m24, %sm34cd, %sm44dc) + : (tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64> + call @dump_mat_dense_9x4(%3) : (tensor<9x4xf64>) -> () + + // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) ) + // CHECK-NEXT: ( 1, 1, 0, 1, 1, 1, 2, 0, 0.5, 5, 3.5, 5, 0.5, 3, 1, 0, 2, 1.5, 2, 1, 0, 0, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ) + %4 = call @concat_sparse_sparse_perm(%sm24ccp, %sm34cd, %sm44dc) + : (tensor<2x4xf64, #MAT_C_C_P>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C_P> + call @dump_mat_perm_9x4(%4) : (tensor<9x4xf64, #MAT_C_C_P>) -> () + + // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) ) + %5 = call @concat_sparse_dense_perm(%sm24ccp, %sm34cdp, %sm44dc) + : (tensor<2x4xf64, #MAT_C_C_P>, tensor<3x4xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64> + call @dump_mat_dense_9x4(%5) : (tensor<9x4xf64>) -> () + + // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) ) + // CHECK-NEXT: ( 1, 3, 2, 1, 0, 1, 1, 0, 0.5, 0, 0, 1, 5, 2, 0, 1.5, 1, 3.5, 1, 5, 2, 1, 0.5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ) + %6 = call @concat_mix_sparse_perm(%m24, %sm34cdp, %sm44dc) + : (tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C> + call @dump_mat_9x4(%6) : (tensor<9x4xf64, #MAT_C_C>) -> () + + // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) ) + %7 = call @concat_mix_dense_perm(%m24, %sm34cd, %sm44dcp) + : (tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C_P>) -> tensor<9x4xf64> + call @dump_mat_dense_9x4(%7) : (tensor<9x4xf64>) -> () + + // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) ) + // CHECK-NEXT: ( 1, 1, 0, 1, 1.5, 1, 3.1, 1, 0, 0.5, 3.5, 2, 0, 0, 1, 1, 5, 2, 5, 2, 0, 1, 0.5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ) + %8 = call @concat_sparse_sparse_dim1(%sm42cc, %sm43cd, %sm44dc) + : (tensor<4x2xf64, #MAT_C_C>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C> + call @dump_mat_4x9(%8) : (tensor<4x9xf64, #MAT_C_C>) -> () + + // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) ) + %9 = call @concat_sparse_dense_dim1(%sm42cc, %sm43cd, %sm44dc) + : (tensor<4x2xf64, #MAT_C_C>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64> + call @dump_mat_dense_4x9(%9) : (tensor<4x9xf64>) -> () + + // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) ) + // CHECK-NEXT: ( 1, 1, 0, 1, 1.5, 1, 3.1, 1, 0, 0.5, 3.5, 2, 0, 0, 1, 1, 5, 2, 5, 2, 0, 1, 0.5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ) + %10 = call @concat_mix_sparse_dim1(%m42, %sm43cd, %sm44dc) + : (tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C> + call @dump_mat_4x9(%10) : (tensor<4x9xf64, #MAT_C_C>) -> () + + // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) ) + %11 = call @concat_mix_dense_dim1(%m42, %sm43cd, %sm44dc) + : (tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64> + call @dump_mat_dense_4x9(%11) : (tensor<4x9xf64>) -> () + + // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) ) + // CHECK-NEXT: ( 1, 3.1, 2, 1, 1, 0, 5, 0, 0, 0, 2, 1, 0.5, 1, 0, 1, 1, 3.5, 5, 0.5, 1.5, 2, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ) + %12 = call @concat_sparse_sparse_perm_dim1(%sm42ccp, %sm43cd, %sm44dc) + : (tensor<4x2xf64, #MAT_C_C_P>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C_P> + call @dump_mat_perm_4x9(%12) : (tensor<4x9xf64, #MAT_C_C_P>) -> () + + // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) ) + %13 = call @concat_sparse_dense_perm_dim1(%sm42ccp, %sm43cdp, %sm44dc) + : (tensor<4x2xf64, #MAT_C_C_P>, tensor<4x3xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64> + call @dump_mat_dense_4x9(%13) : (tensor<4x9xf64>) -> () + + // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) ) + // CHECK-NEXT: ( 1, 1, 0, 1, 1.5, 1, 3.1, 1, 0, 0.5, 3.5, 2, 0, 0, 1, 1, 5, 2, 5, 2, 0, 1, 0.5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ) + %14 = call @concat_mix_sparse_perm_dim1(%m42, %sm43cdp, %sm44dc) + : (tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C> + call @dump_mat_4x9(%14) : (tensor<4x9xf64, #MAT_C_C>) -> () + + // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) ) + %15 = call @concat_mix_dense_perm_dim1(%m42, %sm43cd, %sm44dcp) + : (tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C_P>) -> tensor<4x9xf64> + call @dump_mat_dense_4x9(%15) : (tensor<4x9xf64>) -> () + + // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) ) + // CHECK-NEXT: ( 1, 1, 0, 1, 1.5, 1, 3.1, 1, 0, 0.5, 3.5, 2, 0, 0, 1, 1, 5, 2, 5, 2, 0, 1, 0.5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ) + %16 = call @concat_mix_sparse_dyn(%m42, %sm43cd, %sm44dc) + : (tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor + call @dump_mat_dyn(%16) : (tensor) -> () + + // Release resources. + bufferization.dealloc_tensor %sm24cc : tensor<2x4xf64, #MAT_C_C> + bufferization.dealloc_tensor %sm34cd : tensor<3x4xf64, #MAT_C_D> + bufferization.dealloc_tensor %sm42cc : tensor<4x2xf64, #MAT_C_C> + bufferization.dealloc_tensor %sm43cd : tensor<4x3xf64, #MAT_C_D> + bufferization.dealloc_tensor %sm44dc : tensor<4x4xf64, #MAT_D_C> + bufferization.dealloc_tensor %sm24ccp : tensor<2x4xf64, #MAT_C_C_P> + bufferization.dealloc_tensor %sm34cdp : tensor<3x4xf64, #MAT_C_D_P> + bufferization.dealloc_tensor %sm42ccp : tensor<4x2xf64, #MAT_C_C_P> + bufferization.dealloc_tensor %sm43cdp : tensor<4x3xf64, #MAT_C_D_P> + bufferization.dealloc_tensor %sm44dcp : tensor<4x4xf64, #MAT_D_C_P> + bufferization.dealloc_tensor %0 : tensor<9x4xf64, #MAT_C_C> + bufferization.dealloc_tensor %1 : tensor<9x4xf64> + bufferization.dealloc_tensor %2 : tensor<9x4xf64, #MAT_C_C> + bufferization.dealloc_tensor %3 : tensor<9x4xf64> + bufferization.dealloc_tensor %4 : tensor<9x4xf64, #MAT_C_C_P> + bufferization.dealloc_tensor %5 : tensor<9x4xf64> + bufferization.dealloc_tensor %6 : tensor<9x4xf64, #MAT_C_C> + bufferization.dealloc_tensor %7 : tensor<9x4xf64> + bufferization.dealloc_tensor %8 : tensor<4x9xf64, #MAT_C_C> + bufferization.dealloc_tensor %9 : tensor<4x9xf64> + bufferization.dealloc_tensor %10 : tensor<4x9xf64, #MAT_C_C> + bufferization.dealloc_tensor %11 : tensor<4x9xf64> + bufferization.dealloc_tensor %12 : tensor<4x9xf64, #MAT_C_C_P> + bufferization.dealloc_tensor %13 : tensor<4x9xf64> + bufferization.dealloc_tensor %14 : tensor<4x9xf64, #MAT_C_C> + bufferization.dealloc_tensor %15 : tensor<4x9xf64> + bufferization.dealloc_tensor %16 : tensor + return + } +}