diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -592,15 +592,21 @@ if (!encDst && encSrc) { // This is sparse => dense conversion, which is handled as follows: // dst = new Tensor(0); - // iter = src->toCOO()->getIterator(); + // iter = src->toCOO(); + // iter->startIterator(); // while (elem = iter->getNext()) { // dst[elem.indices] = elem.value; // } - Location loc = op->getLoc(); - RankedTensorType tensorTp = resType.dyn_cast(); - if (!tensorTp) - return failure(); - unsigned rank = tensorTp.getRank(); + // + // ConvertOp::verify() ensures that these casts will succeed, + // and that dstTensorTp subsumes srcTensorTp. + RankedTensorType dstTensorTp = resType.cast(); + RankedTensorType srcTensorTp = srcType.cast(); + unsigned rank = dstTensorTp.getRank(); + Type elemTp = dstTensorTp.getElementType(); + // Fabricate a no-permutation encoding for newParams(). + // The pointer/index types must be those of `src`. + // The dimLevelTypes aren't actually used by kToIter. encDst = SparseTensorEncodingAttr::get( op->getContext(), SmallVector( @@ -608,12 +614,12 @@ AffineMap(), encSrc.getPointerBitWidth(), encSrc.getIndexBitWidth()); SmallVector sizes; SmallVector params; - sizesFromPtr(rewriter, sizes, op, encSrc, tensorTp, src); + sizesFromPtr(rewriter, sizes, op, encSrc, srcTensorTp, src); newParams(rewriter, params, op, encDst, kToIter, sizes, src); Value iter = genNewCall(rewriter, op, params); Value ind = genAlloca(rewriter, loc, rank, rewriter.getIndexType()); - Value elemPtr = genAllocaScalar(rewriter, loc, tensorTp.getElementType()); - Value dst = allocDenseTensor(rewriter, loc, tensorTp, sizes); + Value elemPtr = genAllocaScalar(rewriter, loc, elemTp); + Value dst = allocDenseTensor(rewriter, loc, dstTensorTp, sizes); SmallVector noArgs; SmallVector noTypes; auto whileOp = rewriter.create(loc, noTypes, noArgs); diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2dense.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2dense.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2dense.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2dense.mlir @@ -48,15 +48,70 @@ // module { // - // Output utilities. + // Utilities for output and releasing memory. // - func @dumpf64(%arg0: tensor<2x3x4xf64>) { + func @dump(%arg0: tensor<2x3x4xf64>) { %c0 = arith.constant 0 : index %d0 = arith.constant -1.0 : f64 %0 = vector.transfer_read %arg0[%c0, %c0, %c0], %d0: tensor<2x3x4xf64>, vector<2x3x4xf64> vector.print %0 : vector<2x3x4xf64> return } + func @dumpAndRelease_234(%arg0: tensor<2x3x4xf64>) { + call @dump(%arg0) : (tensor<2x3x4xf64>) -> () + %1 = memref.buffer_cast %arg0 : memref<2x3x4xf64> + memref.dealloc %1 : memref<2x3x4xf64> + return + } + func @dumpAndRelease_p34(%arg0: tensor) { + %0 = tensor.cast %arg0 : tensor to tensor<2x3x4xf64> + call @dump(%0) : (tensor<2x3x4xf64>) -> () + %1 = memref.buffer_cast %arg0 : memref + memref.dealloc %1 : memref + return + } + func @dumpAndRelease_2p4(%arg0: tensor<2x?x4xf64>) { + %0 = tensor.cast %arg0 : tensor<2x?x4xf64> to tensor<2x3x4xf64> + call @dump(%0) : (tensor<2x3x4xf64>) -> () + %1 = memref.buffer_cast %arg0 : memref<2x?x4xf64> + memref.dealloc %1 : memref<2x?x4xf64> + return + } + func @dumpAndRelease_23p(%arg0: tensor<2x3x?xf64>) { + %0 = tensor.cast %arg0 : tensor<2x3x?xf64> to tensor<2x3x4xf64> + call @dump(%0) : (tensor<2x3x4xf64>) -> () + %1 = memref.buffer_cast %arg0 : memref<2x3x?xf64> + memref.dealloc %1 : memref<2x3x?xf64> + return + } + func @dumpAndRelease_2pp(%arg0: tensor<2x?x?xf64>) { + %0 = tensor.cast %arg0 : tensor<2x?x?xf64> to tensor<2x3x4xf64> + call @dump(%0) : (tensor<2x3x4xf64>) -> () + %1 = memref.buffer_cast %arg0 : memref<2x?x?xf64> + memref.dealloc %1 : memref<2x?x?xf64> + return + } + func @dumpAndRelease_p3p(%arg0: tensor) { + %0 = tensor.cast %arg0 : tensor to tensor<2x3x4xf64> + call @dump(%0) : (tensor<2x3x4xf64>) -> () + %1 = memref.buffer_cast %arg0 : memref + memref.dealloc %1 : memref + return + } + func @dumpAndRelease_pp4(%arg0: tensor) { + %0 = tensor.cast %arg0 : tensor to tensor<2x3x4xf64> + call @dump(%0) : (tensor<2x3x4xf64>) -> () + %1 = memref.buffer_cast %arg0 : memref + memref.dealloc %1 : memref + return + } + func @dumpAndRelease_ppp(%arg0: tensor) { + %0 = tensor.cast %arg0 : tensor to tensor<2x3x4xf64> + call @dump(%0) : (tensor<2x3x4xf64>) -> () + %1 = memref.buffer_cast %arg0 : memref + memref.dealloc %1 : memref + return + } // // Main driver. @@ -65,7 +120,7 @@ // // Initialize a 3-dim dense tensor. // - %t = arith.constant dense<[ + %src = arith.constant dense<[ [ [ 1.0, 2.0, 3.0, 4.0 ], [ 5.0, 6.0, 7.0, 8.0 ], [ 9.0, 10.0, 11.0, 12.0 ] ], @@ -77,64 +132,113 @@ // // Convert dense tensor directly to various sparse tensors. // - %1 = sparse_tensor.convert %t : tensor<2x3x4xf64> to tensor<2x3x4xf64, #Tensor1> - %2 = sparse_tensor.convert %t : tensor<2x3x4xf64> to tensor<2x3x4xf64, #Tensor2> - %3 = sparse_tensor.convert %t : tensor<2x3x4xf64> to tensor<2x3x4xf64, #Tensor3> - %4 = sparse_tensor.convert %t : tensor<2x3x4xf64> to tensor<2x3x4xf64, #Tensor4> - %5 = sparse_tensor.convert %t : tensor<2x3x4xf64> to tensor<2x3x4xf64, #Tensor5> - %6 = sparse_tensor.convert %t : tensor<2x3x4xf64> to tensor<2x3x4xf64, #Tensor6> + %s2341 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor<2x3x4xf64, #Tensor1> + %s2342 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor<2x3x4xf64, #Tensor2> + %s2343 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor<2x3x4xf64, #Tensor3> + %s2344 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor<2x3x4xf64, #Tensor4> + %s2345 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor<2x3x4xf64, #Tensor5> + %s2346 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor<2x3x4xf64, #Tensor6> + + %sp344 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor + %sp345 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor + %sp346 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor + %s2p44 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor<2x?x4xf64, #Tensor4> + %s2p45 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor<2x?x4xf64, #Tensor5> + %s2p46 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor<2x?x4xf64, #Tensor6> + %s23p4 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor<2x3x?xf64, #Tensor4> + %s23p5 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor<2x3x?xf64, #Tensor5> + %s23p6 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor<2x3x?xf64, #Tensor6> + %s2pp4 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor<2x?x?xf64, #Tensor4> + %s2pp5 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor<2x?x?xf64, #Tensor5> + %s2pp6 = sparse_tensor.convert %src : tensor<2x3x4xf64> to tensor<2x?x?xf64, #Tensor6> // // Convert sparse tensor back to dense. // - %a = sparse_tensor.convert %1 : tensor<2x3x4xf64, #Tensor1> to tensor<2x3x4xf64> - %b = sparse_tensor.convert %2 : tensor<2x3x4xf64, #Tensor2> to tensor<2x3x4xf64> - %c = sparse_tensor.convert %3 : tensor<2x3x4xf64, #Tensor3> to tensor<2x3x4xf64> - %d = sparse_tensor.convert %4 : tensor<2x3x4xf64, #Tensor4> to tensor<2x3x4xf64> - %e = sparse_tensor.convert %5 : tensor<2x3x4xf64, #Tensor5> to tensor<2x3x4xf64> - %f = sparse_tensor.convert %6 : tensor<2x3x4xf64, #Tensor6> to tensor<2x3x4xf64> + %d2341 = sparse_tensor.convert %s2341 : tensor<2x3x4xf64, #Tensor1> to tensor<2x3x4xf64> + %d2342 = sparse_tensor.convert %s2342 : tensor<2x3x4xf64, #Tensor2> to tensor<2x3x4xf64> + %d2343 = sparse_tensor.convert %s2343 : tensor<2x3x4xf64, #Tensor3> to tensor<2x3x4xf64> + %d2344 = sparse_tensor.convert %s2344 : tensor<2x3x4xf64, #Tensor4> to tensor<2x3x4xf64> + %d2345 = sparse_tensor.convert %s2345 : tensor<2x3x4xf64, #Tensor5> to tensor<2x3x4xf64> + %d2346 = sparse_tensor.convert %s2346 : tensor<2x3x4xf64, #Tensor6> to tensor<2x3x4xf64> + + %dp344 = sparse_tensor.convert %sp344 : tensor to tensor + %dp345 = sparse_tensor.convert %sp345 : tensor to tensor + %dp346 = sparse_tensor.convert %sp346 : tensor to tensor + %d2p44 = sparse_tensor.convert %s2p44 : tensor<2x?x4xf64, #Tensor4> to tensor<2x?x4xf64> + %d2p45 = sparse_tensor.convert %s2p45 : tensor<2x?x4xf64, #Tensor5> to tensor<2x?x4xf64> + %d2p46 = sparse_tensor.convert %s2p46 : tensor<2x?x4xf64, #Tensor6> to tensor<2x?x4xf64> + %d23p4 = sparse_tensor.convert %s23p4 : tensor<2x3x?xf64, #Tensor4> to tensor<2x3x?xf64> + %d23p5 = sparse_tensor.convert %s23p5 : tensor<2x3x?xf64, #Tensor5> to tensor<2x3x?xf64> + %d23p6 = sparse_tensor.convert %s23p6 : tensor<2x3x?xf64, #Tensor6> to tensor<2x3x?xf64> + %d2pp4 = sparse_tensor.convert %s2pp4 : tensor<2x?x?xf64, #Tensor4> to tensor<2x?x?xf64> + %d2pp5 = sparse_tensor.convert %s2pp5 : tensor<2x?x?xf64, #Tensor5> to tensor<2x?x?xf64> + %d2pp6 = sparse_tensor.convert %s2pp6 : tensor<2x?x?xf64, #Tensor6> to tensor<2x?x?xf64> + + %dp3p4 = sparse_tensor.convert %sp344 : tensor to tensor + %dp3p5 = sparse_tensor.convert %sp345 : tensor to tensor + %dp3p6 = sparse_tensor.convert %sp346 : tensor to tensor + %dpp44 = sparse_tensor.convert %s2p44 : tensor<2x?x4xf64, #Tensor4> to tensor + %dpp45 = sparse_tensor.convert %s2p45 : tensor<2x?x4xf64, #Tensor5> to tensor + %dpp46 = sparse_tensor.convert %s2p46 : tensor<2x?x4xf64, #Tensor6> to tensor + %dppp4 = sparse_tensor.convert %s2pp4 : tensor<2x?x?xf64, #Tensor4> to tensor + %dppp5 = sparse_tensor.convert %s2pp5 : tensor<2x?x?xf64, #Tensor5> to tensor + %dppp6 = sparse_tensor.convert %s2pp6 : tensor<2x?x?xf64, #Tensor6> to tensor // - // Check round-trip equality. + // Check round-trip equality. And release dense tensors. // - // CHECK: ( ( ( 1, 2, 3, 4 ), ( 5, 6, 7, 8 ), ( 9, 10, 11, 12 ) ), ( ( 13, 14, 15, 16 ), ( 17, 18, 19, 20 ), ( 21, 22, 23, 24 ) ) ) - // CHECK-NEXT: ( ( ( 1, 2, 3, 4 ), ( 5, 6, 7, 8 ), ( 9, 10, 11, 12 ) ), ( ( 13, 14, 15, 16 ), ( 17, 18, 19, 20 ), ( 21, 22, 23, 24 ) ) ) - // CHECK-NEXT: ( ( ( 1, 2, 3, 4 ), ( 5, 6, 7, 8 ), ( 9, 10, 11, 12 ) ), ( ( 13, 14, 15, 16 ), ( 17, 18, 19, 20 ), ( 21, 22, 23, 24 ) ) ) - // CHECK-NEXT: ( ( ( 1, 2, 3, 4 ), ( 5, 6, 7, 8 ), ( 9, 10, 11, 12 ) ), ( ( 13, 14, 15, 16 ), ( 17, 18, 19, 20 ), ( 21, 22, 23, 24 ) ) ) - // CHECK-NEXT: ( ( ( 1, 2, 3, 4 ), ( 5, 6, 7, 8 ), ( 9, 10, 11, 12 ) ), ( ( 13, 14, 15, 16 ), ( 17, 18, 19, 20 ), ( 21, 22, 23, 24 ) ) ) - // CHECK-NEXT: ( ( ( 1, 2, 3, 4 ), ( 5, 6, 7, 8 ), ( 9, 10, 11, 12 ) ), ( ( 13, 14, 15, 16 ), ( 17, 18, 19, 20 ), ( 21, 22, 23, 24 ) ) ) - // CHECK-NEXT: ( ( ( 1, 2, 3, 4 ), ( 5, 6, 7, 8 ), ( 9, 10, 11, 12 ) ), ( ( 13, 14, 15, 16 ), ( 17, 18, 19, 20 ), ( 21, 22, 23, 24 ) ) ) - call @dumpf64(%t) : (tensor<2x3x4xf64>) -> () - call @dumpf64(%a) : (tensor<2x3x4xf64>) -> () - call @dumpf64(%b) : (tensor<2x3x4xf64>) -> () - call @dumpf64(%c) : (tensor<2x3x4xf64>) -> () - call @dumpf64(%d) : (tensor<2x3x4xf64>) -> () - call @dumpf64(%e) : (tensor<2x3x4xf64>) -> () - call @dumpf64(%f) : (tensor<2x3x4xf64>) -> () + // CHECK-COUNT-28: ( ( ( 1, 2, 3, 4 ), ( 5, 6, 7, 8 ), ( 9, 10, 11, 12 ) ), ( ( 13, 14, 15, 16 ), ( 17, 18, 19, 20 ), ( 21, 22, 23, 24 ) ) ) + call @dump(%src) : (tensor<2x3x4xf64>) -> () + call @dumpAndRelease_234(%d2341) : (tensor<2x3x4xf64>) -> () + call @dumpAndRelease_234(%d2342) : (tensor<2x3x4xf64>) -> () + call @dumpAndRelease_234(%d2343) : (tensor<2x3x4xf64>) -> () + call @dumpAndRelease_234(%d2344) : (tensor<2x3x4xf64>) -> () + call @dumpAndRelease_234(%d2345) : (tensor<2x3x4xf64>) -> () + call @dumpAndRelease_234(%d2346) : (tensor<2x3x4xf64>) -> () + call @dumpAndRelease_p34(%dp344) : (tensor) -> () + call @dumpAndRelease_p34(%dp345) : (tensor) -> () + call @dumpAndRelease_p34(%dp346) : (tensor) -> () + call @dumpAndRelease_2p4(%d2p44) : (tensor<2x?x4xf64>) -> () + call @dumpAndRelease_2p4(%d2p45) : (tensor<2x?x4xf64>) -> () + call @dumpAndRelease_2p4(%d2p46) : (tensor<2x?x4xf64>) -> () + call @dumpAndRelease_23p(%d23p4) : (tensor<2x3x?xf64>) -> () + call @dumpAndRelease_23p(%d23p5) : (tensor<2x3x?xf64>) -> () + call @dumpAndRelease_23p(%d23p6) : (tensor<2x3x?xf64>) -> () + call @dumpAndRelease_2pp(%d2pp4) : (tensor<2x?x?xf64>) -> () + call @dumpAndRelease_2pp(%d2pp5) : (tensor<2x?x?xf64>) -> () + call @dumpAndRelease_2pp(%d2pp6) : (tensor<2x?x?xf64>) -> () + call @dumpAndRelease_p3p(%dp3p4) : (tensor) -> () + call @dumpAndRelease_p3p(%dp3p5) : (tensor) -> () + call @dumpAndRelease_p3p(%dp3p6) : (tensor) -> () + call @dumpAndRelease_pp4(%dpp44) : (tensor) -> () + call @dumpAndRelease_pp4(%dpp45) : (tensor) -> () + call @dumpAndRelease_pp4(%dpp46) : (tensor) -> () + call @dumpAndRelease_ppp(%dppp4) : (tensor) -> () + call @dumpAndRelease_ppp(%dppp5) : (tensor) -> () + call @dumpAndRelease_ppp(%dppp6) : (tensor) -> () // - // Release the resources. + // Release sparse tensors. // - sparse_tensor.release %1 : tensor<2x3x4xf64, #Tensor1> - sparse_tensor.release %2 : tensor<2x3x4xf64, #Tensor2> - sparse_tensor.release %3 : tensor<2x3x4xf64, #Tensor3> - sparse_tensor.release %4 : tensor<2x3x4xf64, #Tensor4> - sparse_tensor.release %5 : tensor<2x3x4xf64, #Tensor5> - sparse_tensor.release %6 : tensor<2x3x4xf64, #Tensor6> - - %ma = memref.buffer_cast %a : memref<2x3x4xf64> - %mb = memref.buffer_cast %b : memref<2x3x4xf64> - %mc = memref.buffer_cast %c : memref<2x3x4xf64> - %md = memref.buffer_cast %d : memref<2x3x4xf64> - %me = memref.buffer_cast %e : memref<2x3x4xf64> - %mf = memref.buffer_cast %f : memref<2x3x4xf64> - - memref.dealloc %ma : memref<2x3x4xf64> - memref.dealloc %mb : memref<2x3x4xf64> - memref.dealloc %mc : memref<2x3x4xf64> - memref.dealloc %md : memref<2x3x4xf64> - memref.dealloc %me : memref<2x3x4xf64> - memref.dealloc %mf : memref<2x3x4xf64> + sparse_tensor.release %s2341 : tensor<2x3x4xf64, #Tensor1> + sparse_tensor.release %s2342 : tensor<2x3x4xf64, #Tensor2> + sparse_tensor.release %s2343 : tensor<2x3x4xf64, #Tensor3> + sparse_tensor.release %s2344 : tensor<2x3x4xf64, #Tensor4> + sparse_tensor.release %s2345 : tensor<2x3x4xf64, #Tensor5> + sparse_tensor.release %s2346 : tensor<2x3x4xf64, #Tensor6> + sparse_tensor.release %sp344 : tensor + sparse_tensor.release %sp345 : tensor + sparse_tensor.release %sp346 : tensor + sparse_tensor.release %s2p44 : tensor<2x?x4xf64, #Tensor4> + sparse_tensor.release %s2p45 : tensor<2x?x4xf64, #Tensor5> + sparse_tensor.release %s2p46 : tensor<2x?x4xf64, #Tensor6> + sparse_tensor.release %s23p4 : tensor<2x3x?xf64, #Tensor4> + sparse_tensor.release %s23p5 : tensor<2x3x?xf64, #Tensor5> + sparse_tensor.release %s23p6 : tensor<2x3x?xf64, #Tensor6> + sparse_tensor.release %s2pp4 : tensor<2x?x?xf64, #Tensor4> + sparse_tensor.release %s2pp5 : tensor<2x?x?xf64, #Tensor5> + sparse_tensor.release %s2pp6 : tensor<2x?x?xf64, #Tensor6> return }