diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h @@ -63,6 +63,12 @@ /// of the tensor. unsigned getCOOStart(SparseTensorEncodingAttr enc); +/// Helpers to setup a COO type. +RankedTensorType getCOOFromTypeWithOrdering(RankedTensorType src, + AffineMap ordering, bool ordered); + +RankedTensorType getCOOFromType(RankedTensorType src, bool ordered); + // // Dimension level types. // diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td @@ -58,6 +58,47 @@ let hasVerifier = 1; } +def SparseTensor_PackOp : SparseTensor_Op<"pack">, + Arguments<(ins AnyRankedTensor:$data, + AnyRankedTensor:$indices)>, + Results<(outs AnySparseTensor: $result)> { + let summary = "Returns a sparse tensor from the given (data, indices) pair"; + + let description = [{ + Packs the data/indices into a COO sparse tensor. The coordinates in `indices` + shall not exceed the dimension sizes of the returned sparse tensor. + Note that the returned tensor must be statically + shaped because it is impossible to infer the shape from sparse coordinates. + + `$indices`: stored via a 2-D tensor of integer elements with shape [N, ndims], + which specifies the indices of the elements in the sparse tensor that contains + non-zero values. + + `$data`: stored via a 1-D tensor with shape [N], that supplies the corresponding + values for the indices. + + The operation can be used to materialize a sparse tensor from external sources. E.g., + when passing from Python as two numpy arrays for data and indices. + + Example: + ```mlir + %data = arith.constant dense<[ 1 , 5 ]> : tensor<3xf64> + %indices = arith.constant dense<[[0, 0],[1, 2]]> : tensor<3x2xindex> + + %st = sparse_tensor.pack %data, %indices : tensor<6xf64>, tensor<6x2xi32 + to tensor<100x100xf64, #COO> + // %st = [[1, 0, 0, 0], + // [0, 0, 5, 0], + // [0, 0, 0, 0]] + ``` + }]; + + let assemblyFormat = "$data `,` $indices attr-dict `:` type($data) `,` type($indices)" + "`to` type($result)"; + + let hasVerifier = 1; +} + def SparseTensor_ConvertOp : SparseTensor_Op<"convert", [Pure, SameOperandsAndResultElementType]>, Arguments<(ins AnyTensor:$source)>, diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -15,6 +15,7 @@ #include "mlir/IR/DialectImplementation.h" #include "mlir/IR/Matchers.h" #include "mlir/IR/OpImplementation.h" +#include "mlir/IR/PatternMatch.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Support/FormatVariadic.h" @@ -440,6 +441,44 @@ return rank; } +// Helpers to setup a COO type. +RankedTensorType sparse_tensor::getCOOFromTypeWithOrdering(RankedTensorType src, + AffineMap ordering, + bool ordered) { + auto *ctx = src.getContext(); + auto rank = src.getRank(); + SmallVector dims; + + // An unordered and non-unique compressed dim at beginning. + // If this is also the last dimension, then it is unique. + dims.push_back(*getDimLevelType(LevelFormat::Compressed, ordered, rank == 1)); + if (rank > 1) { + // TODO: it is actually ordered at the level for ordered input. + // Followed by unordered non-unique n-2 singleton levels. + std::fill_n(std::back_inserter(dims), rank - 2, + *getDimLevelType(LevelFormat::Singleton, ordered, false)); + // Ends by a unique singleton level unless the tensor rank is 1. + dims.push_back(*getDimLevelType(LevelFormat::Singleton, ordered, true)); + } + + SparseTensorEncodingAttr encSrc = getSparseTensorEncoding(src); + // TODO: Maybe pick the bitwidth based on input/output tensors (probably the + // largest one among them) in the original operation instead of using the + // default value. + unsigned pointerBitWidth = encSrc ? encSrc.getPointerBitWidth() : 0; + unsigned indexBitWidth = encSrc ? encSrc.getIndexBitWidth() : 0; + auto enc = SparseTensorEncodingAttr::get(ctx, dims, ordering, AffineMap(), + pointerBitWidth, indexBitWidth); + return RankedTensorType::get(src.getShape(), src.getElementType(), enc); +} + +RankedTensorType sparse_tensor::getCOOFromType(RankedTensorType src, + bool ordered) { + return getCOOFromTypeWithOrdering( + src, AffineMap::getMultiDimIdentityMap(src.getRank(), src.getContext()), + ordered); +} + uint64_t mlir::sparse_tensor::toOrigDim(SparseTensorEncodingAttr enc, uint64_t d) { if (enc) { @@ -575,6 +614,42 @@ return success(); } +LogicalResult PackOp::verify() { + TensorType dataTp = getData().getType(), idxTp = getIndices().getType(); + TensorType retTp = getResult().getType(); + + if (!isUniqueCOOType(retTp.cast())) + return emitError("must be packed into a COO tensor"); + + if (!retTp.hasStaticShape() || !dataTp.hasStaticShape() || + !idxTp.hasStaticShape()) + return emitError("all input types must be statically shaped"); + + if (dataTp.getRank() != 1 || idxTp.getRank() != 2) { + return emitError( + "requires rank 1 tensor for value and rank 2 tensor for indices"); + } + + auto enc = getSparseTensorEncoding(retTp); + if (idxTp.getElementType() != enc.getIndexType() || + dataTp.getElementType() != retTp.getElementType()) + return emitError("unmatched type between input and output"); + + auto dNOE = dataTp.getShape()[0]; + auto iNOE = idxTp.getShape()[0]; + if (!ShapedType::isDynamic(dNOE) && !ShapedType::isDynamic(iNOE) && + dNOE != iNOE) + return emitError("unmatched number of elements in data and indices"); + + // A tensor for indices means the input COO is rank N + auto inRank = idxTp.getShape()[1]; + auto ouRank = retTp.getRank(); + if (!ShapedType::isDynamic(inRank) && inRank != ouRank) + return emitError("unmatched rank between input and output"); + + return success(); +} + LogicalResult ConvertOp::verify() { if (auto tp1 = getSource().getType().dyn_cast()) { if (auto tp2 = getDest().getType().dyn_cast()) { diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp @@ -151,40 +151,13 @@ // TODO: The dim level property of the COO type relies on input tensors, the // shape relies on the output tensor -// Helpers to setup a COO type. static RankedTensorType getUnorderedCOOFromTypeWithOrdering(RankedTensorType src, AffineMap ordering) { - auto *ctx = src.getContext(); - auto rank = src.getRank(); - SmallVector dims; - - // An unordered and non-unique compressed dim at beginning. - dims.push_back(DimLevelType::CompressedNuNo); - - if (rank > 1) { - // TODO: it is actually ordered at the level for ordered input. - // Followed by unordered non-unique n-2 singleton levels. - std::fill_n(std::back_inserter(dims), rank - 2, - DimLevelType::SingletonNuNo); - // TODO: only if all the inputs (for concatentate) are unique at the last - // level should the COO has a unique level at the end. Ends by a unordered - // unique singleton level unless the tensor rank is 1. - dims.push_back(DimLevelType::SingletonNo); - } - SparseTensorEncodingAttr encSrc = getSparseTensorEncoding(src); - // TODO: Maybe pick the bitwidth based on input/output tensors (probably the - // largest one among them) in the original operation instead of using the - // default value. - unsigned pointerBitWidth = encSrc ? encSrc.getPointerBitWidth() : 0; - unsigned indexBitWidth = encSrc ? encSrc.getIndexBitWidth() : 0; - auto enc = SparseTensorEncodingAttr::get(ctx, dims, ordering, AffineMap(), - pointerBitWidth, indexBitWidth); - return RankedTensorType::get(src.getShape(), src.getElementType(), enc); + return getCOOFromTypeWithOrdering(src, ordering, false); } static RankedTensorType getUnorderedCOOFromType(RankedTensorType src) { - return getUnorderedCOOFromTypeWithOrdering( - src, AffineMap::getMultiDimIdentityMap(src.getRank(), src.getContext())); + return getCOOFromType(src, false); } /// Collects the dynamic dimension sizes for `tp` with the assumption that diff --git a/mlir/test/Dialect/SparseTensor/invalid.mlir b/mlir/test/Dialect/SparseTensor/invalid.mlir --- a/mlir/test/Dialect/SparseTensor/invalid.mlir +++ b/mlir/test/Dialect/SparseTensor/invalid.mlir @@ -8,6 +8,66 @@ // ----- +#SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"], indexBitWidth=32}> + +func.func @non_static_pack_ret(%data: tensor<6xf64>, %index: tensor<6x1xi32>) + -> tensor { + // expected-error@+1 {{all input types must be statically shaped}} + %0 = sparse_tensor.pack %data, %index : tensor<6xf64>, tensor<6x1xi32> + to tensor + return %0 : tensor +} + +// ----- + +#SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"], indexBitWidth=32}> + +func.func @invalid_pack_data(%data: tensor<6x1xf64>, %index: tensor<6x1xi32>) + -> tensor<100xf64, #SparseVector> { + // expected-error@+1 {{requires rank 1 tensor for value and rank 2 tensor for indices}} + %0 = sparse_tensor.pack %data, %index : tensor<6x1xf64>, tensor<6x1xi32> + to tensor<100xf64, #SparseVector> + return %0 : tensor<100xf64, #SparseVector> +} + +// ----- + +#SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"], indexBitWidth=32}> + +func.func @invalid_pack_type(%data: tensor<6xf64>, %index: tensor<6x1xi32>) + -> tensor<100xf32, #SparseVector> { + // expected-error@+1 {{unmatched type between input and output}} + %0 = sparse_tensor.pack %data, %index : tensor<6xf64>, tensor<6x1xi32> + to tensor<100xf32, #SparseVector> + return %0 : tensor<100xf32, #SparseVector> +} + +// ----- + +#SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"], indexBitWidth=32}> + +func.func @invalid_pack_type(%data: tensor<5xf64>, %index: tensor<6x1xi32>) + -> tensor<100xf64, #SparseVector> { + // expected-error@+1 {{unmatched number of elements in data and indices}} + %0 = sparse_tensor.pack %data, %index : tensor<5xf64>, tensor<6x1xi32> + to tensor<100xf64, #SparseVector> + return %0 : tensor<100xf64, #SparseVector> +} + +// ----- + +#SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"], indexBitWidth=32}> + +func.func @invalid_pack_type(%data: tensor<6xf64>, %index: tensor<6x2xi32>) + -> tensor<100xf64, #SparseVector> { + // expected-error@+1 {{unmatched rank between input and output}} + %0 = sparse_tensor.pack %data, %index : tensor<6xf64>, tensor<6x2xi32> + to tensor<100xf64, #SparseVector> + return %0 : tensor<100xf64, #SparseVector> +} + +// ----- + func.func @invalid_pointers_dense(%arg0: tensor<128xf64>) -> memref { // expected-error@+1 {{'sparse_tensor.pointers' op operand #0 must be sparse tensor of any type values, but got 'tensor<128xf64>'}} %0 = sparse_tensor.pointers %arg0 { dimension = 0 : index } : tensor<128xf64> to memref diff --git a/mlir/test/Dialect/SparseTensor/roundtrip.mlir b/mlir/test/Dialect/SparseTensor/roundtrip.mlir --- a/mlir/test/Dialect/SparseTensor/roundtrip.mlir +++ b/mlir/test/Dialect/SparseTensor/roundtrip.mlir @@ -13,6 +13,22 @@ // ----- +#SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"], indexBitWidth=32}> + +// CHECK-LABEL: func @sparse_pack( +// CHECK-SAME: %[[D:.*]]: tensor<6xf64>, +// CHECK-SAME: %[[I:.*]]: tensor<6x1xi32>) +// CHECK: %[[R:.*]] = sparse_tensor.pack %[[D]], %[[I]] +// CHECK: return %[[R]] : tensor<100xf64, #{{.*}}> +func.func @sparse_pack(%data: tensor<6xf64>, %index: tensor<6x1xi32>) + -> tensor<100xf64, #SparseVector> { + %0 = sparse_tensor.pack %data, %index : tensor<6xf64>, tensor<6x1xi32> + to tensor<100xf64, #SparseVector> + return %0 : tensor<100xf64, #SparseVector> +} + +// ----- + #SparseMatrix = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> // CHECK-LABEL: func @sparse_new_symmetry(