diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h --- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h @@ -150,6 +150,7 @@ kEmptyCOO = 4, kToCOO = 5, kToIterator = 6, + kPack = 7, }; /// This enum defines all the sparse representations supportable by diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -335,6 +335,18 @@ const uint64_t *lvl2dim, SparseTensorEnumeratorBase &lvlEnumerator); + /// Constructs a sparse tensor with the given encoding, and initializes + /// the contents from the level buffers. This ctor allocates exactly + /// the required amount of overhead storage, not using any heuristics. + /// It assumes that the data provided by `lvlBufs` can be directly used to + /// interpret the result sparse tensor and performs *NO* integrity test on the + /// input data. It also assume that the trailing COO coordinate buffer is + /// passed in as a single AoS memory. + SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes, + uint64_t lvlRank, const uint64_t *lvlSizes, + const DimLevelType *lvlTypes, const uint64_t *lvl2dim, + const intptr_t *lvlBufs); + /// Allocates a new empty sparse tensor. The preconditions/assertions /// are as per the `SparseTensorStorageBase` ctor; which is to say, /// the `dimSizes` and `lvlSizes` must both be "sizes" not "shapes", @@ -403,6 +415,19 @@ uint64_t srcRank, const uint64_t *src2lvl, const SparseTensorStorageBase &source); + /// Allocates a new sparse tensor and initialize it with the data stored level + /// buffers directly. + /// + /// Precondition: + /// * as per the `SparseTensorStorageBase` ctor. + /// * the data integrity stored in `buffers` is guaranteed by users already. + static SparseTensorStorage * + packFromLvlBuffers(uint64_t dimRank, const uint64_t *dimShape, + uint64_t lvlRank, const uint64_t *lvlSizes, + const DimLevelType *lvlTypes, const uint64_t *lvl2dim, + uint64_t srcRank, const uint64_t *src2lvl, + const intptr_t *buffers); + ~SparseTensorStorage() final = default; /// Partially specialize these getter methods based on template types. @@ -626,7 +651,7 @@ /// Finalizes the sparse position structure at this level. void finalizeSegment(uint64_t l, uint64_t full = 0, uint64_t count = 1) { if (count == 0) - return; // Short-circuit, since it'll be a nop. + return; // Short-circuit, since it'll be a nop. const auto dlt = getLvlType(l); // Avoid redundant bounds checking. if (isCompressedDLT(dlt)) { appendPos(l, coordinates[l].size(), count); @@ -995,6 +1020,18 @@ return tensor; } +template +SparseTensorStorage *SparseTensorStorage::packFromLvlBuffers( + uint64_t dimRank, const uint64_t *dimShape, uint64_t lvlRank, + const uint64_t *lvlSizes, const DimLevelType *lvlTypes, + const uint64_t *lvl2dim, uint64_t srcRank, const uint64_t *src2lvl, + const intptr_t *buffers) { + assert(dimShape && "Got nullptr for dimension shape"); + auto *tensor = new SparseTensorStorage( + dimRank, dimShape, lvlRank, lvlSizes, lvlTypes, lvl2dim, buffers); + return tensor; +} + template SparseTensorStorage::SparseTensorStorage( uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, @@ -1153,6 +1190,59 @@ } } +template +SparseTensorStorage::SparseTensorStorage( + uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, + const uint64_t *lvlSizes, const DimLevelType *lvlTypes, + const uint64_t *lvl2dim, const intptr_t *lvlBufs) + : SparseTensorStorage(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, + lvl2dim) { + uint64_t trailCOOLen = 0, parentSz = 1, bufIdx = 0; + for (uint64_t l = 0; l < lvlRank; l++) { + if (!isUniqueLvl(l) && isCompressedLvl(l)) { + // A `compressed-nu` level marks the start of trailing COO start level. + // Since the coordinate buffer used for trailing COO are passed in as AoS + // scheme, and SparseTensorStorage uses a SoA scheme, we can not simply + // copy the value from the provided buffers. + trailCOOLen = lvlRank - l; + break; + } + assert(!isSingletonLvl(l) && + "Singleton level not following a compressed-nu level"); + if (isCompressedLvl(l)) { + P *posPtr = reinterpret_cast

(lvlBufs[bufIdx++]); + C *crdPtr = reinterpret_cast(lvlBufs[bufIdx++]); + // Copies the lvlBuf into the vectors. The buffer can not be simply reused + // because the memory passed from users is not necessarily allocated on + // heap. + positions[l].assign(posPtr, posPtr + parentSz + 1); + coordinates[l].assign(crdPtr, crdPtr + positions[l][parentSz]); + } else { + assert(isDenseLvl(l) && "Level is not dense"); + } + parentSz = assembledSize(parentSz, l); + } + + if (trailCOOLen != 0) { + uint64_t cooStartLvl = lvlRank - trailCOOLen; + assert(!isUniqueLvl(cooStartLvl) && isCompressedLvl(cooStartLvl)); + P *posPtr = reinterpret_cast

(lvlBufs[bufIdx++]); + C *aosCrdPtr = reinterpret_cast(lvlBufs[bufIdx++]); + positions[cooStartLvl].assign(posPtr, posPtr + parentSz + 1); + P crdLen = positions[cooStartLvl][parentSz]; + for (uint64_t l = cooStartLvl; l < lvlRank; l++) { + coordinates[l].resize(crdLen); + for (uint64_t n = 0; n < crdLen; n++) { + coordinates[l][n] = *(aosCrdPtr + (l - cooStartLvl) + n * trailCOOLen); + } + } + parentSz = assembledSize(parentSz, cooStartLvl); + } + + V *valPtr = reinterpret_cast(lvlBufs[bufIdx]); + values.assign(valPtr, valPtr + parentSz); +} + #undef ASSERT_DENSE_DLT } // namespace sparse_tensor diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -420,6 +420,10 @@ return !rtp || rtp.getRank() == 0; } +// Generates code to cast a tensor to a memref. +TypedValue genToMemref(OpBuilder &builder, Location loc, + Value tensor); + /// Infers the result type and generates `ToPositionsOp`. Value genToPositions(OpBuilder &builder, Location loc, Value tensor, Level lvl); diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -661,6 +661,14 @@ return builder.create(loc, resTp, valuesBuffer, lvlCoords); } +TypedValue +sparse_tensor::genToMemref(OpBuilder &builder, Location loc, Value tensor) { + auto tTp = llvm::cast(tensor.getType()); + auto mTp = MemRefType::get(tTp.getShape(), tTp.getElementType()); + return builder.create(loc, mTp, tensor) + .getResult(); +} + Value sparse_tensor::genToPositions(OpBuilder &builder, Location loc, Value tensor, Level lvl) { const auto srcTp = getSparseTensorType(tensor); diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -539,15 +539,8 @@ } } -static TypedValue genToMemref(OpBuilder &builder, Location loc, - Value tensor) { - auto tTp = llvm::cast(tensor.getType()); - auto mTp = MemRefType::get(tTp.getShape(), tTp.getElementType()); - return builder.create(loc, mTp, tensor) - .getResult(); -} - -Value genSliceToSize(OpBuilder &builder, Location loc, Value mem, Value sz) { +static Value genSliceToSize(OpBuilder &builder, Location loc, Value mem, + Value sz) { auto elemTp = llvm::cast(mem.getType()).getElementType(); return builder .create( diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -131,8 +131,11 @@ } /// Returns an array with the dimension-sizes of the given tensor. +/// If the *tensor* parameters is null, the tensor type is assumed to have a +/// static shape. static SmallVector getDimSizes(OpBuilder &builder, Location loc, - SparseTensorType stt, Value tensor) { + SparseTensorType stt, + Value tensor = Value()) { SmallVector out; fillDimSizes(builder, loc, stt, tensor, out); return out; @@ -210,6 +213,32 @@ return allocaBuffer(builder, loc, lvlTypes); } +/// Extracts the bare (aligned) pointers that point to the tensor. +static Value extractBarePtrFromTensor(OpBuilder &builder, Location loc, + Value tensor) { + auto buf = genToMemref(builder, loc, tensor); + return builder.create(loc, buf); +} + +/// Generates a temporary buffer for the level-types of the given encoding. +static Value genLvlPtrsBuffers(OpBuilder &builder, Location loc, + ValueRange lvlTensors, Value valTensor) { + SmallVector lvlBarePtrs; + lvlBarePtrs.reserve(lvlTensors.size() + 1); + // Passing in lvl buffer pointers. + for (const auto lvl : lvlTensors) + lvlBarePtrs.push_back(extractBarePtrFromTensor(builder, loc, lvl)); + + // Passing in value buffer pointers. + lvlBarePtrs.push_back(extractBarePtrFromTensor(builder, loc, valTensor)); + Value idxPtr = builder.create( + loc, allocaBuffer(builder, loc, lvlBarePtrs)); + Value idxCast = + builder.create(loc, builder.getI64Type(), idxPtr); + return builder.create(loc, getOpaquePointerType(builder), + idxCast); +} + /// This class abstracts over the API of `_mlir_ciface_newSparseTensor`: /// the "swiss army knife" method of the sparse runtime support library /// for materializing sparse tensors into the computation. This abstraction @@ -1282,7 +1311,7 @@ const Dimension concatDim = op.getDimension(); const Dimension dimRank = dstTp.getDimRank(); - Value dst; // destination tensor + Value dst; // destination tensor Value dstDimToLvl; // destination tensor permutation (if sparse out) // A pointer to the value being inserted (if dense => sparse) Value elemPtr; @@ -1437,6 +1466,29 @@ } }; +/// Sparse conversion rule for the sparse_tensor.pack operator. +class SparseTensorPackConverter : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + LogicalResult + matchAndRewrite(PackOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + const Location loc = op->getLoc(); + const auto dstTp = getSparseTensorType(op.getResult()); + // PackOps always returns a static shaped tensor result. + assert(dstTp.hasStaticDimShape()); + SmallVector dimSizes = getDimSizes(rewriter, loc, dstTp); + Value dst = + NewCallParams(rewriter, loc) + .genBuffers(dstTp.withoutDimToLvl(), dimSizes) + .genNewCall(Action::kPack, + genLvlPtrsBuffers(rewriter, loc, adaptor.getLevels(), + adaptor.getValues())); + rewriter.replaceOp(op, dst); + return success(); + } +}; + } // namespace //===----------------------------------------------------------------------===// @@ -1457,18 +1509,18 @@ void mlir::populateSparseTensorConversionPatterns( TypeConverter &typeConverter, RewritePatternSet &patterns, const SparseTensorConversionOptions &options) { - patterns - .add, - SparseReshapeConverter, - SparseTensorConcatConverter, SparseTensorAllocConverter, - SparseTensorDeallocConverter, SparseTensorToPositionsConverter, - SparseTensorToCoordinatesConverter, SparseTensorToValuesConverter, - SparseNumberOfEntriesConverter, SparseTensorLoadConverter, - SparseTensorInsertConverter, SparseTensorExpandConverter, - SparseTensorCompressConverter, SparseTensorOutConverter>( - typeConverter, patterns.getContext()); + patterns.add, + SparseReshapeConverter, + SparseTensorConcatConverter, SparseTensorAllocConverter, + SparseTensorDeallocConverter, SparseTensorToPositionsConverter, + SparseTensorToCoordinatesConverter, + SparseTensorToValuesConverter, SparseNumberOfEntriesConverter, + SparseTensorLoadConverter, SparseTensorInsertConverter, + SparseTensorExpandConverter, SparseTensorCompressConverter, + SparseTensorOutConverter, SparseTensorPackConverter>( + typeConverter, patterns.getContext()); patterns.add(typeConverter, patterns.getContext(), options); diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -313,6 +313,13 @@ auto *coo = tensor.toCOO(lvlRank, lvlSizes, dimRank, dim2lvl); \ return new SparseTensorIterator(coo); \ } \ + case Action::kPack: { \ + assert(ptr && "Received nullptr for SparseTensorStorage object"); \ + intptr_t *buffers = static_cast(ptr); \ + return SparseTensorStorage::packFromLvlBuffers( \ + dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, lvl2dim, dimRank, \ + dim2lvl, buffers); \ + } \ } \ MLIR_SPARSETENSOR_FATAL("unknown action: %d\n", \ static_cast(action)); \ diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir @@ -24,7 +24,7 @@ // REDEFINE: %{sparse_compiler_opts} = enable-runtime-library=false vl=4 // RUN: %if mlir_arm_sve_tests %{ %{compile_sve} | %{run_sve} | FileCheck %s %} -// TODO: Pack only support CodeGen Path +// TODO: support sparse_tensor.unpack on libgen path. #SortedCOO = #sparse_tensor.encoding<{ lvlTypes = [ "compressed-nu", "singleton" ] diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_libgen.mlir copy from mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir copy to mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_libgen.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_libgen.mlir @@ -17,14 +17,14 @@ // DEFINE: %{env} = //-------------------------------------------------------------------------------------------------- -// REDEFINE: %{sparse_compiler_opts} = enable-runtime-library=false // RUN: %{compile} | %{run} | FileCheck %s // // Do the same run, but now with VLA vectorization. // REDEFINE: %{sparse_compiler_opts} = enable-runtime-library=false vl=4 // RUN: %if mlir_arm_sve_tests %{ %{compile_sve} | %{run_sve} | FileCheck %s %} -// TODO: Pack only support CodeGen Path +// TODO: This is considered to be a short-living tests and should be merged with sparse_pack.mlir +// after sparse_tensor.unpack is supported on libgen path. #SortedCOO = #sparse_tensor.encoding<{ lvlTypes = [ "compressed-nu", "singleton" ] @@ -42,9 +42,10 @@ crdWidth = 32 }> -#BCOO = #sparse_tensor.encoding<{ - lvlTypes = [ "dense", "compressed-hi-nu", "singleton" ] -}> +// TODO: "compressed-hi" is not supported by libgen path. +// #BCOO = #sparse_tensor.encoding<{ +// lvlTypes = [ "dense", "compressed-hi-nu", "singleton" ] +//}> module { // @@ -100,25 +101,6 @@ %csr= sparse_tensor.pack %csr_data, %csr_pos32, %csr_index32 : tensor<4xf64>, tensor<3xi32>, tensor<3xi32> to tensor<2x2xf64, #CSR> - %bdata = arith.constant dense< - [ 1.0, 2.0, 3.0, 4.0, 5.0, 0.0] - > : tensor<6xf64> - - %bpos = arith.constant dense< - [0, 3, 3, 5] - > : tensor<4xindex> - - %bindex = arith.constant dense< - [[ 1, 2], - [ 5, 6], - [ 7, 8], - [ 2, 3], - [ 4, 2], - [ 10, 10]] - > : tensor<6x2xindex> - %bs = sparse_tensor.pack %bdata, %bpos, %bindex : - tensor<6xf64>, tensor<4xindex>, tensor<6x2xindex> to tensor<2x10x10xf64, #BCOO> - // CHECK:1 // CHECK-NEXT:2 // CHECK-NEXT:1 @@ -173,65 +155,10 @@ vector.print %v: f64 } - %d_csr = tensor.empty() : tensor<4xf64> - %p_csr = tensor.empty() : tensor<3xi32> - %i_csr = tensor.empty() : tensor<3xi32> - %rd_csr, %rp_csr, %ri_csr, %ld_csr, %lp_csr, %li_csr = sparse_tensor.unpack %csr : tensor<2x2xf64, #CSR> - outs(%d_csr, %p_csr, %i_csr : tensor<4xf64>, tensor<3xi32>, tensor<3xi32>) - -> tensor<4xf64>, (tensor<3xi32>, tensor<3xi32>), index, (index, index) - - // CHECK-NEXT: ( 1, 2, 3, {{.*}} ) - %vd_csr = vector.transfer_read %rd_csr[%c0], %f0 : tensor<4xf64>, vector<4xf64> - vector.print %vd_csr : vector<4xf64> - - // CHECK-NEXT:1 - // CHECK-NEXT:2 - // CHECK-NEXT:3 - // - // CHECK-NEXT:4 - // CHECK-NEXT:5 - // - // Make sure the trailing zeros are not traversed. - // CHECK-NOT: 0 - sparse_tensor.foreach in %bs : tensor<2x10x10xf64, #BCOO> do { - ^bb0(%0: index, %1: index, %2: index, %v: f64) : - vector.print %v: f64 - } - %od = tensor.empty() : tensor<3xf64> - %op = tensor.empty() : tensor<2xi32> - %oi = tensor.empty() : tensor<3x2xi32> - %d, %p, %i, %dl, %pl, %il = sparse_tensor.unpack %s5 : tensor<10x10xf64, #SortedCOOI32> - outs(%od, %op, %oi : tensor<3xf64>, tensor<2xi32>, tensor<3x2xi32>) - -> tensor<3xf64>, (tensor<2xi32>, tensor<3x2xi32>), index, (index, index) - - // CHECK-NEXT: ( 1, 2, 3 ) - %vd = vector.transfer_read %d[%c0], %f0 : tensor<3xf64>, vector<3xf64> - vector.print %vd : vector<3xf64> - - // CHECK-NEXT: ( ( 1, 2 ), ( 5, 6 ), ( 7, 8 ) ) - %vi = vector.transfer_read %i[%c0, %c0], %i0 : tensor<3x2xi32>, vector<3x2xi32> - vector.print %vi : vector<3x2xi32> - - - %bod = tensor.empty() : tensor<6xf64> - %bop = tensor.empty() : tensor<4xindex> - %boi = tensor.empty() : tensor<6x2xindex> - %bd, %bp, %bi, %ld, %lp, %li = sparse_tensor.unpack %bs : tensor<2x10x10xf64, #BCOO> - outs(%bod, %bop, %boi : tensor<6xf64>, tensor<4xindex>, tensor<6x2xindex>) - -> tensor<6xf64>, (tensor<4xindex>, tensor<6x2xindex>), index, (index, index) - - // CHECK-NEXT: ( 1, 2, 3, 4, 5, {{.*}} ) - %vbd = vector.transfer_read %bd[%c0], %f0 : tensor<6xf64>, vector<6xf64> - vector.print %vbd : vector<6xf64> - // CHECK-NEXT: 5 - vector.print %ld : index - - // CHECK-NEXT: ( ( 1, 2 ), ( 5, 6 ), ( 7, 8 ), ( 2, 3 ), ( 4, 2 ), ( {{.*}}, {{.*}} ) ) - %vbi = vector.transfer_read %bi[%c0, %c0], %c0 : tensor<6x2xindex>, vector<6x2xindex> - vector.print %vbi : vector<6x2xindex> - // CHECK-NEXT: 10 - vector.print %li : index + bufferization.dealloc_tensor %s4 : tensor<10x10xf64, #SortedCOO> + bufferization.dealloc_tensor %s5 : tensor<10x10xf64, #SortedCOOI32> + bufferization.dealloc_tensor %csr : tensor<2x2xf64, #CSR> return }