diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h --- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h @@ -157,6 +157,31 @@ SingletonNuNo = 19, // 0b100_11 }; +/// This enum defines all the sparse representations supportable by +/// the SparseTensor dialect. Unlike DimLevelType, it does not encode level +/// properties, which are irrelevant to sparse tensor storage scheme. +enum class StorageLevelType : uint8_t { + Undef = 0, // 0b000_00 + Dense = 4, // 0b001_00 + Compressed = 8, // 0b010_00 + Singleton = 16, // 0b100_00 +}; + +/// Returns string representation of the given storage level type. +inline std::string toMLIRString(StorageLevelType dlt) { + switch (dlt) { + case StorageLevelType::Undef: + return "\"undef\""; + case StorageLevelType::Dense: + return "\"dense\""; + case StorageLevelType::Compressed: + return "\"compressed\""; + case StorageLevelType::Singleton: + return "\"singleton\""; + } + return ""; +} + /// Returns string representation of the given dimension level type. inline std::string toMLIRString(DimLevelType dlt) { switch (dlt) { @@ -231,6 +256,39 @@ return !(static_cast(dlt) & 1); } +/// Convert a DimLevelType to its corresponding StorageLevelType. +constexpr StorageLevelType fromDLT(DimLevelType dlt) { + return static_cast(static_cast(dlt) & ~3); +} + +/// Ensure both StorageLevelType and DimLevelType have the same underlying type. +static_assert( + std::is_same_v, uint8_t>); +static_assert(std::is_same_v, uint8_t>); + +/// Ensures both StorageLevelType and DimLevelType use the same value to +/// encoding the same level type. +static_assert(static_cast(StorageLevelType::Dense) == + static_cast(DimLevelType::Dense)); +static_assert(static_cast(StorageLevelType::Compressed) == + static_cast(DimLevelType::Compressed)); +static_assert(static_cast(StorageLevelType::Singleton) == + static_cast(DimLevelType::Singleton)); + +/// Ensure teh above conversions works as intended. +static_assert( + (fromDLT(DimLevelType::Undef) == StorageLevelType::Undef && + fromDLT(DimLevelType::Dense) == StorageLevelType::Dense && + fromDLT(DimLevelType::Compressed) == StorageLevelType::Compressed && + fromDLT(DimLevelType::CompressedNu) == StorageLevelType::Compressed && + fromDLT(DimLevelType::CompressedNo) == StorageLevelType::Compressed && + fromDLT(DimLevelType::CompressedNuNo) == StorageLevelType::Compressed && + fromDLT(DimLevelType::Singleton) == StorageLevelType::Singleton && + fromDLT(DimLevelType::SingletonNu) == StorageLevelType::Singleton && + fromDLT(DimLevelType::SingletonNo) == StorageLevelType::Singleton && + fromDLT(DimLevelType::SingletonNuNo) == StorageLevelType::Singleton), + "fromDLT conversion is broken"); + // Ensure the above predicates work as intended. static_assert((isValidDLT(DimLevelType::Undef) && isValidDLT(DimLevelType::Dense) && diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td @@ -23,6 +23,40 @@ list traits = []> : AttrDef; +// Sparse tensor storage attribute, +def SparseStorageEncodingAttr : SparseTensor_Attr<"SparseStorageEncoding", + [ DeclareAttrInterfaceMethods ] > { + let mnemonic = "storage"; + let description = [{ + An attribute to encoding TACO-style information on sparsity properties + of tensors that are related to sparse tensor storage layout. + + Read more from `SparseTensorEncodingAttr`. + }]; + + // Data in sparse tensor encoding. + let parameters = ( + ins + // A dimension level type for each dimension of the tensor type. + ArrayRefParameter< + "::mlir::sparse_tensor::StorageLevelType", + "per dimension level type" + >: $storageLevelType, + + // The required bit width for pointer storage. + "unsigned":$pointerBitWidth, + // The required bit width for index storage. + "unsigned":$indexBitWidth + ); + + let extraClassDeclaration = [{ + // Build a StorageEncodingAttr from TensorEncodingAttr + static SparseStorageEncodingAttr fromTensorEncodingAttr(SparseTensorEncodingAttr attr); + }]; + + let hasCustomAssemblyFormat = 1; +} + // Sparse tensor encoding attribute. def SparseTensorEncodingAttr : SparseTensor_Attr<"SparseTensorEncoding", [ DeclareAttrInterfaceMethods ] > { diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorTypes.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorTypes.td --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorTypes.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorTypes.td @@ -50,12 +50,15 @@ ``` }]; - let parameters = (ins SparseTensorEncodingAttr : $encoding); + let parameters = (ins SparseStorageEncodingAttr : $encoding); let builders = [ - TypeBuilderWithInferredContext<(ins "SparseTensorEncodingAttr":$encoding), [{ + TypeBuilderWithInferredContext<(ins "SparseStorageEncodingAttr":$encoding), [{ assert(encoding && "sparse tensor encoding should not be null"); return $_get(encoding.getContext(), encoding); }]>, + TypeBuilderWithInferredContext<(ins "SparseTensorEncodingAttr":$encoding), [{ + return get(SparseStorageEncodingAttr::fromTensorEncodingAttr(encoding)); + }]>, TypeBuilderWithInferredContext<(ins "Type":$type), [{ return get(getSparseTensorEncoding(type)); }]>, diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -45,6 +45,114 @@ } } +Attribute SparseStorageEncodingAttr::parse(AsmParser &parser, Type type) { + if (failed(parser.parseLess())) + return {}; + // Parse the data as a dictionary. + DictionaryAttr dict; + if (failed(parser.parseAttribute(dict))) + return {}; + if (failed(parser.parseGreater())) + return {}; + // Process the data from the parsed dictionary value into struct-like data. + SmallVector dlt; + unsigned ptr = 0; + unsigned ind = 0; + for (const NamedAttribute &attr : dict) { + if (attr.getName() == "storageLevelType") { + auto arrayAttr = attr.getValue().dyn_cast(); + if (!arrayAttr) { + parser.emitError(parser.getNameLoc(), + "expected an array for dimension level types"); + return {}; + } + for (auto i : arrayAttr) { + auto strAttr = i.dyn_cast(); + if (!strAttr) { + parser.emitError(parser.getNameLoc(), + "expected a string value in dimension level types"); + return {}; + } + auto strVal = strAttr.getValue(); + if (strVal == "undef") { + dlt.push_back(StorageLevelType::Undef); + } else if (strVal == "dense") { + dlt.push_back(StorageLevelType::Dense); + } else if (strVal == "compressed") { + dlt.push_back(StorageLevelType::Compressed); + } else if (strVal == "singleton") { + dlt.push_back(StorageLevelType::Singleton); + } else { + parser.emitError(parser.getNameLoc(), + "unexpected dimension level type: ") + << strVal; + return {}; + } + } + } else if (attr.getName() == "pointerBitWidth") { + auto intAttr = attr.getValue().dyn_cast(); + if (!intAttr) { + parser.emitError(parser.getNameLoc(), + "expected an integral pointer bitwidth"); + return {}; + } + ptr = intAttr.getInt(); + } else if (attr.getName() == "indexBitWidth") { + auto intAttr = attr.getValue().dyn_cast(); + if (!intAttr) { + parser.emitError(parser.getNameLoc(), + "expected an integral index bitwidth"); + return {}; + } + ind = intAttr.getInt(); + } else { + parser.emitError(parser.getNameLoc(), "unexpected key: ") + << attr.getName().strref(); + return {}; + } + } + // Construct struct-like storage for attribute. + return parser.getChecked(parser.getContext(), dlt, + ptr, ind); +} + +void SparseStorageEncodingAttr::print(AsmPrinter &printer) const { + // Print the struct-like storage in dictionary fashion. + printer << "<{ storageLevelType = [ "; + for (unsigned i = 0, e = getStorageLevelType().size(); i < e; i++) { + printer << toMLIRString(getStorageLevelType()[i]); + if (i != e - 1) + printer << ", "; + } + printer << " ]"; + if (getPointerBitWidth()) + printer << ", pointerBitWidth = " << getPointerBitWidth(); + if (getIndexBitWidth()) + printer << ", indexBitWidth = " << getIndexBitWidth(); + printer << " }>"; +} + +LogicalResult SparseStorageEncodingAttr::verifyEncoding( + ArrayRef shape, Type elementType, + ::llvm::function_ref<::mlir::InFlightDiagnostic()> emitError) const { + if (acceptBitWidth(getPointerBitWidth()) && + acceptBitWidth(getIndexBitWidth())) + return success(); + + return failure(); +} + +SparseStorageEncodingAttr SparseStorageEncodingAttr::fromTensorEncodingAttr( + SparseTensorEncodingAttr attr) { + SmallVector slt; + for (auto dlt : attr.getDimLevelType()) + slt.push_back(fromDLT(dlt)); + + return SparseStorageEncodingAttr::get(attr.getContext(), slt, + attr.getPointerBitWidth(), + attr.getIndexBitWidth()); +} + Type SparseTensorEncodingAttr::getPointerType() const { unsigned ptrWidth = getPointerBitWidth(); Type indexType = IndexType::get(getContext()); @@ -228,8 +336,9 @@ getHigherOrdering(), getPointerBitWidth(), getIndexBitWidth()))) return failure(); - // Check integrity with tensor type specifics. Dimension ordering is optional, - // but we always should have dimension level types for the full rank. + // Check integrity with tensor type specifics. Dimension ordering is + // optional, but we always should have dimension level types for the full + // rank. unsigned size = shape.size(); if (size == 0) return emitError() << "expected non-scalar sparse tensor"; @@ -259,8 +368,6 @@ mlir::sparse_tensor::getSparseTensorEncoding(Type type) { if (auto ttp = type.dyn_cast()) return ttp.getEncoding().dyn_cast_or_null(); - if (auto mdtp = type.dyn_cast()) - return mdtp.getEncoding(); return nullptr; } @@ -277,8 +384,8 @@ if (!isSingletonDim(tp, i)) return false; - // This works for rank == 1 (unique the only compressed) and rank > 1 (unique - // on the last singleton). + // This works for rank == 1 (unique the only compressed) and rank > 1 + // (unique on the last singleton). return isUniqueDim(tp, tp.getRank() - 1); } @@ -379,7 +486,7 @@ } auto enc = md.getType().getEncoding(); - ArrayRef dlts = enc.getDimLevelType(); + ArrayRef dlts = enc.getStorageLevelType(); unsigned rank = dlts.size(); if (mdKind != StorageSpecifierKind::ValMemSize) { @@ -390,7 +497,8 @@ if (d >= rank) return op->emitError("requested dimension out of bound"); - if (mdKind == StorageSpecifierKind::PtrMemSize && isSingletonDLT(dlts[d])) + if (mdKind == StorageSpecifierKind::PtrMemSize && + dlts[d] == StorageLevelType::Singleton) return op->emitError( "requested pointer memory size on a singleton level"); } @@ -412,8 +520,9 @@ auto shape1 = tp1.getShape(); auto shape2 = tp2.getShape(); // Accept size matches between the source and the destination type - // (e.g. 10 vs. 10, 10 vs. ?, or ? vs. ?), but reject direct mismatches or - // matches that would need a runtime assert (e.g. 10 vs. 20 or ? vs. 10). + // (e.g. 10 vs. 10, 10 vs. ?, or ? vs. ?), but reject direct mismatches + // or matches that would need a runtime assert (e.g. 10 vs. 20 or ? vs. + // 10). for (unsigned d = 0, rank = tp1.getRank(); d < rank; d++) if (shape1[d] != shape2[d] && shape2[d] != ShapedType::kDynamic) return emitError("unexpected conversion mismatch in dimension ") << d; @@ -646,9 +755,10 @@ // If all dimension are statically known, the sum of all the input // dimensions should be equal to the output dimension. if (sumDim != dstDim) - return emitError( - "The concatenation dimension of the output tensor should be the " - "sum of all the concatenation dimensions of the input tensors."); + return emitError("The concatenation dimension of the output tensor " + "should be the " + "sum of all the concatenation dimensions of the " + "input tensors."); } } else { int64_t prev = dstDim; diff --git a/mlir/test/Dialect/SparseTensor/fold.mlir b/mlir/test/Dialect/SparseTensor/fold.mlir --- a/mlir/test/Dialect/SparseTensor/fold.mlir +++ b/mlir/test/Dialect/SparseTensor/fold.mlir @@ -46,6 +46,8 @@ return } +#SparseVectorStorage = #sparse_tensor.storage<{storageLevelType = ["compressed"]}> + // CHECK-LABEL: func @sparse_get_specifier_dce_fold( // CHECK-SAME: %[[A0:.*]]: !sparse_tensor.storage_specifier // CHECK-SAME: %[[A1:.*]]: i64, @@ -53,12 +55,12 @@ // CHECK-NOT: sparse_tensor.storage_specifier.set // CHECK-NOT: sparse_tensor.storage_specifier.get // CHECK: return %[[A1]] -func.func @sparse_get_specifier_dce_fold(%arg0: !sparse_tensor.storage_specifier<#SparseVector>, %arg1: i64, %arg2: i64) -> i64 { +func.func @sparse_get_specifier_dce_fold(%arg0: !sparse_tensor.storage_specifier<#SparseVectorStorage>, %arg1: i64, %arg2: i64) -> i64 { %0 = sparse_tensor.storage_specifier.set %arg0 dim_sz at 0 with %arg1 - : i64, !sparse_tensor.storage_specifier<#SparseVector> + : i64, !sparse_tensor.storage_specifier<#SparseVectorStorage> %1 = sparse_tensor.storage_specifier.set %0 ptr_mem_sz at 0 with %arg2 - : i64, !sparse_tensor.storage_specifier<#SparseVector> + : i64, !sparse_tensor.storage_specifier<#SparseVectorStorage> %2 = sparse_tensor.storage_specifier.get %1 dim_sz at 0 - : !sparse_tensor.storage_specifier<#SparseVector> to i64 + : !sparse_tensor.storage_specifier<#SparseVectorStorage> to i64 return %2 : i64 } diff --git a/mlir/test/Dialect/SparseTensor/invalid.mlir b/mlir/test/Dialect/SparseTensor/invalid.mlir --- a/mlir/test/Dialect/SparseTensor/invalid.mlir +++ b/mlir/test/Dialect/SparseTensor/invalid.mlir @@ -98,7 +98,7 @@ // ----- -#SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"]}> +#SparseVector = #sparse_tensor.storage<{storageLevelType = ["compressed"]}> func.func @sparse_get_md(%arg0: !sparse_tensor.storage_specifier<#SparseVector>) -> i64 { // expected-error@+1 {{redundant dimension argument for querying value memory size}} @@ -109,7 +109,7 @@ // ----- -#SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"]}> +#SparseVector = #sparse_tensor.storage<{storageLevelType = ["compressed"]}> func.func @sparse_get_md(%arg0: !sparse_tensor.storage_specifier<#SparseVector>) -> i64 { // expected-error@+1 {{missing dimension argument}} @@ -120,7 +120,7 @@ // ----- -#SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"]}> +#SparseVector = #sparse_tensor.storage<{storageLevelType = ["compressed"]}> func.func @sparse_get_md(%arg0: !sparse_tensor.storage_specifier<#SparseVector>) -> i64 { // expected-error@+1 {{requested dimension out of bound}} @@ -131,7 +131,7 @@ // ----- -#COO = #sparse_tensor.encoding<{dimLevelType = ["compressed-nu", "singleton"]}> +#COO = #sparse_tensor.storage<{storageLevelType = ["compressed", "singleton"]}> func.func @sparse_get_md(%arg0: !sparse_tensor.storage_specifier<#COO>) -> i64 { // expected-error@+1 {{requested pointer memory size on a singleton level}} @@ -142,7 +142,7 @@ // ----- -#COO = #sparse_tensor.encoding<{dimLevelType = ["compressed-nu", "singleton"]}> +#COO = #sparse_tensor.storage<{storageLevelType = ["compressed", "singleton"]}> func.func @sparse_get_md(%arg0: !sparse_tensor.storage_specifier<#COO>) -> i64 { // expected-error@+1 {{type mismatch between requested }} @@ -153,7 +153,7 @@ // ----- -#SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"]}> +#SparseVector = #sparse_tensor.storage<{storageLevelType = ["compressed"]}> func.func @sparse_set_md(%arg0: !sparse_tensor.storage_specifier<#SparseVector>, %arg1: i32) diff --git a/mlir/test/Dialect/SparseTensor/roundtrip.mlir b/mlir/test/Dialect/SparseTensor/roundtrip.mlir --- a/mlir/test/Dialect/SparseTensor/roundtrip.mlir +++ b/mlir/test/Dialect/SparseTensor/roundtrip.mlir @@ -104,7 +104,7 @@ // ----- -#SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"]}> +#SparseVector = #sparse_tensor.storage<{storageLevelType = ["compressed"]}> // CHECK-LABEL: func @sparse_metadata_init( // CHECK: %[[T:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#{{.*}}> @@ -116,7 +116,7 @@ // ----- -#SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"]}> +#SparseVector = #sparse_tensor.storage<{storageLevelType = ["compressed"]}> // CHECK-LABEL: func @sparse_get_md( // CHECK-SAME: %[[A:.*]]: !sparse_tensor.storage_specifier<#{{.*}}> @@ -130,7 +130,7 @@ // ----- -#SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"]}> +#SparseVector = #sparse_tensor.storage<{storageLevelType = ["compressed"]}> // CHECK-LABEL: func @sparse_set_md( // CHECK-SAME: %[[A:.*]]: !sparse_tensor.storage_specifier<#{{.*}}>,