diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -283,11 +283,23 @@ Type resType = op.getType(); auto encDst = getSparseTensorEncoding(resType); auto encSrc = getSparseTensorEncoding(op.source().getType()); - // TODO: implement sparse => sparse - // and sparse => dense - if (!encDst || encSrc) + if (encDst && encSrc) { + // This is a sparse => sparse conversion, which is handled as follows: + // t = src->asCOO(); ; src to COO in dst order + // dst = newSparseTensor(t) + // Using the coordinate scheme as an intermediate does not always + // yield the fastest conversion but avoids the need for a full + // O(N^2) conversion matrix. + Value perm; + Value coo = genNewCall(rewriter, op, encDst, 3, perm, operands[0]); + rewriter.replaceOp(op, genNewCall(rewriter, op, encDst, 1, perm, coo)); + return success(); + } + if (!encDst || encSrc) { + // TODO: sparse => dense return failure(); - // This is a dense => sparse conversion, that is handled as follows: + } + // This is a dense => sparse conversion, which is handled as follows: // t = newSparseCOO() // for i1 in dim1 // .. diff --git a/mlir/lib/ExecutionEngine/SparseUtils.cpp b/mlir/lib/ExecutionEngine/SparseUtils.cpp --- a/mlir/lib/ExecutionEngine/SparseUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseUtils.cpp @@ -94,14 +94,17 @@ /// Getter for elements array. const std::vector> &getElements() const { return elements; } - /// Factory method. + /// Factory method. Permutes the original dimensions according to + /// the given ordering and expects subsequent add() calls to honor + /// that same ordering for the given indices. The result is a + /// fully permuted coordinate scheme. static SparseTensor *newSparseTensor(uint64_t size, uint64_t *sizes, uint64_t *perm, uint64_t capacity = 0) { - std::vector indices(size); + std::vector permsz(size); for (uint64_t r = 0; r < size; r++) - indices[perm[r]] = sizes[r]; - return new SparseTensor(indices, capacity); + permsz[perm[r]] = sizes[r]; + return new SparseTensor(permsz, capacity); } private: @@ -168,8 +171,13 @@ /// Constructs a sparse tensor storage scheme from the given sparse /// tensor in coordinate scheme following the given per-rank dimension /// dense/sparse annotations. - SparseTensorStorage(SparseTensor *tensor, uint8_t *sparsity) - : sizes(tensor->getSizes()), pointers(getRank()), indices(getRank()) { + SparseTensorStorage(SparseTensor *tensor, uint8_t *sparsity, + uint64_t *perm) + : sizes(tensor->getSizes()), rev(getRank()), pointers(getRank()), + indices(getRank()) { + // Store "reverse" permutation. + for (uint64_t d = 0, rank = getRank(); d < rank; d++) + rev[perm[d]] = d; // Provide hints on capacity. // TODO: needs fine-tuning based on sparsity uint64_t nnz = tensor->getElements().size(); @@ -184,8 +192,12 @@ assert(sparsity[d] == kDense && "singleton not yet supported"); } } + // Prepare sparse pointer structures for all dimensions. + for (uint64_t d = 0, rank = getRank(); d < rank; d++) + if (sparsity[d] == kCompressed) + pointers[d].push_back(0); // Then setup the tensor. - traverse(tensor, sparsity, 0, nnz, 0); + fromCOO(tensor, sparsity, 0, nnz, 0); } virtual ~SparseTensorStorage() {} @@ -203,11 +215,31 @@ } void getValues(std::vector **out) override { *out = &values; } - /// Factory method. - static SparseTensorStorage *newSparseTensor(SparseTensor *t, - uint8_t *s) { + /// Returns this sparse tensor storage scheme as a new memory-resident + /// sparse tensor in coordinate scheme with the given dimension order. + SparseTensor *asCOO(uint64_t *perm) { + // Restore original order and allocate oordinate scheme with new order. + uint64_t size = getRank(); + std::vector tmp(size); + for (uint64_t r = 0; r < size; r++) + tmp[rev[r]] = sizes[r]; + SparseTensor *tensor = + SparseTensor::newSparseTensor(size, tmp.data(), perm, values.size()); + // Populate coordinate scheme from old order with new order. + for (uint64_t r = 0; r < size; r++) + tmp[r] = perm[rev[r]]; + std::vector idx(size); + toCOO(tensor, tmp.data(), idx, 0, 0); + return tensor; + } + + /// Factory method. Expects a coordinate scheme that respects the same + /// permutation as is desired for the new sparse storage scheme. + static SparseTensorStorage * + newSparseTensor(SparseTensor *t, uint8_t *sparsity, uint64_t *perm) { t->sort(); // sort lexicographically - SparseTensorStorage *n = new SparseTensorStorage(t, s); + SparseTensorStorage *n = + new SparseTensorStorage(t, sparsity, perm); delete t; return n; } @@ -216,17 +248,14 @@ /// Initializes sparse tensor storage scheme from a memory-resident sparse /// tensor in coordinate scheme. This method prepares the pointers and indices /// arrays under the given per-rank dimension dense/sparse annotations. - void traverse(SparseTensor *tensor, uint8_t *sparsity, uint64_t lo, - uint64_t hi, uint64_t d) { + void fromCOO(SparseTensor *tensor, uint8_t *sparsity, uint64_t lo, + uint64_t hi, uint64_t d) { const std::vector> &elements = tensor->getElements(); // Once dimensions are exhausted, insert the numerical values. if (d == getRank()) { values.push_back(lo < hi ? elements[lo].value : 0); return; } - // Prepare a sparse pointer structure at this dimension. - if (sparsity[d] == kCompressed && pointers[d].empty()) - pointers[d].push_back(0); // Visit all elements in this interval. uint64_t full = 0; while (lo < hi) { @@ -240,10 +269,10 @@ indices[d].push_back(idx); } else { for (; full < idx; full++) - traverse(tensor, sparsity, 0, 0, d + 1); // pass empty + fromCOO(tensor, sparsity, 0, 0, d + 1); // pass empty full++; } - traverse(tensor, sparsity, lo, seg, d + 1); + fromCOO(tensor, sparsity, lo, seg, d + 1); // And move on to next segment in interval. lo = seg; } @@ -252,12 +281,34 @@ pointers[d].push_back(indices[d].size()); } else { for (uint64_t sz = tensor->getSizes()[d]; full < sz; full++) - traverse(tensor, sparsity, 0, 0, d + 1); // pass empty + fromCOO(tensor, sparsity, 0, 0, d + 1); // pass empty + } + } + + /// Stores the sparse tensor storage scheme into a memory-resident sparse + /// tensor in coordinate scheme. + void toCOO(SparseTensor *tensor, uint64_t *perm, + std::vector &idx, uint64_t pos, uint64_t d) { + if (d == getRank()) { + tensor->add(idx, values[pos]); + } else if (pointers[d].empty()) { + // Dense dimension. + for (uint64_t i = 0; i < sizes[d]; i++) { + idx[perm[d]] = i; + toCOO(tensor, perm, idx, pos * sizes[d] + i, d + 1); + } + } else { + // Sparse dimension. + for (uint64_t ii = pointers[d][pos]; ii < pointers[d][pos + 1]; ii++) { + idx[perm[d]] = indices[d][ii]; + toCOO(tensor, perm, idx, ii, d + 1); + } } } private: std::vector sizes; // per-rank dimension sizes + std::vector rev; // "reverse" permutation std::vector> pointers; std::vector> indices; std::vector values; @@ -437,9 +488,12 @@ tensor = openTensor(static_cast(ptr), asize, sizes, perm); \ else if (action == 1) \ tensor = static_cast *>(ptr); \ - else \ + else if (action == 2) \ return SparseTensor::newSparseTensor(asize, sizes, perm); \ - return SparseTensorStorage::newSparseTensor(tensor, sparsity); \ + else \ + return static_cast *>(ptr)->asCOO(perm); \ + return SparseTensorStorage::newSparseTensor(tensor, sparsity, \ + perm); \ } #define IMPL1(RET, NAME, TYPE, LIB) \ @@ -498,9 +552,10 @@ /// Constructs a new sparse tensor. This is the "swiss army knife" /// method for materializing sparse tensors into the computation. /// action -/// 0 : ptr contains filename to read into storage -/// 1 : ptr contains coordinate scheme to assign to storage -/// 2 : returns coordinate scheme to fill (call back later with 1) +/// 0 : ptr contains filename to read into storage +/// 1 : ptr contains coordinate scheme to assign to new storage +/// 2 : returns empty coordinate scheme to fill (call back 1 to setup) +/// 3 : returns coordinate scheme from storage in ptr (call back 1 to convert) void *newSparseTensor(uint8_t *abase, uint8_t *adata, uint64_t aoff, uint64_t asize, uint64_t astride, uint64_t *sbase, uint64_t *sdata, uint64_t soff, uint64_t ssize, diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion.mlir @@ -0,0 +1,170 @@ +// RUN: mlir-opt %s \ +// RUN: --sparsification --sparse-tensor-conversion \ +// RUN: --convert-vector-to-scf --convert-scf-to-std \ +// RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \ +// RUN: --std-bufferize --finalizing-bufferize \ +// RUN: --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm | \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +#Tensor1 = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed", "compressed", "compressed" ], + dimOrdering = affine_map<(i,j,k) -> (i,j,k)> +}> + +#Tensor2 = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed", "compressed", "compressed" ], + dimOrdering = affine_map<(i,j,k) -> (j,k,i)> +}> + +#Tensor3 = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed", "compressed", "compressed" ], + dimOrdering = affine_map<(i,j,k) -> (k,i,j)> +}> + +// +// Integration test that tests conversions between sparse tensors. +// +module { + // + // Output utility. + // + func @dump(%arg0: memref, %arg1: memref) { + %c0 = constant 0 : index + %d0 = constant 0.0 : f64 + %0 = vector.transfer_read %arg0[%c0], %d0: memref, vector<24xf64> + vector.print %0 : vector<24xf64> + %1 = vector.transfer_read %arg1[%c0], %c0: memref, vector<24xindex> + vector.print %1 : vector<24xindex> + return + } + + // + // Main driver. + // + func @entry() { + %c2 = constant 2 : index + + // + // Initialize a 3-dim dense tensor. + // + %t = constant dense<[ + [ [ 1.0, 2.0, 3.0, 4.0 ], + [ 5.0, 6.0, 7.0, 8.0 ], + [ 9.0, 10.0, 11.0, 12.0 ] ], + [ [ 13.0, 14.0, 15.0, 16.0 ], + [ 17.0, 18.0, 19.0, 20.0 ], + [ 21.0, 22.0, 23.0, 24.0 ] ] + ]> : tensor<2x3x4xf64> + + // + // Convert dense tensor directly to various sparse tensors. + // tensor1: stored as 2x3x4 + // tensor2: stored as 3x4x2 + // tensor3: stored as 4x2x3 + // + %1 = sparse_tensor.convert %t : tensor<2x3x4xf64> to tensor<2x3x4xf64, #Tensor1> + %2 = sparse_tensor.convert %t : tensor<2x3x4xf64> to tensor<2x3x4xf64, #Tensor2> + %3 = sparse_tensor.convert %t : tensor<2x3x4xf64> to tensor<2x3x4xf64, #Tensor3> + + // + // Convert sparse tensor to various sparse tensors. Note that the result + // should always correspond to the direct conversion, since the sparse + // tensor formats have the ability to restore into the original ordering. + // + %a = sparse_tensor.convert %1 : tensor<2x3x4xf64, #Tensor1> to tensor<2x3x4xf64, #Tensor1> + %b = sparse_tensor.convert %2 : tensor<2x3x4xf64, #Tensor2> to tensor<2x3x4xf64, #Tensor1> + %c = sparse_tensor.convert %3 : tensor<2x3x4xf64, #Tensor3> to tensor<2x3x4xf64, #Tensor1> + %d = sparse_tensor.convert %1 : tensor<2x3x4xf64, #Tensor1> to tensor<2x3x4xf64, #Tensor2> + %e = sparse_tensor.convert %2 : tensor<2x3x4xf64, #Tensor2> to tensor<2x3x4xf64, #Tensor2> + %f = sparse_tensor.convert %3 : tensor<2x3x4xf64, #Tensor3> to tensor<2x3x4xf64, #Tensor2> + %g = sparse_tensor.convert %1 : tensor<2x3x4xf64, #Tensor1> to tensor<2x3x4xf64, #Tensor3> + %h = sparse_tensor.convert %2 : tensor<2x3x4xf64, #Tensor2> to tensor<2x3x4xf64, #Tensor3> + %i = sparse_tensor.convert %3 : tensor<2x3x4xf64, #Tensor3> to tensor<2x3x4xf64, #Tensor3> + + // + // Verify direct results. + // + // CHECK: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 ) + // CHECK-NEXT: ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 ) + // CHECK-NEXT: ( 1, 13, 2, 14, 3, 15, 4, 16, 5, 17, 6, 18, 7, 19, 8, 20, 9, 21, 10, 22, 11, 23, 12, 24 ) + // CHECK-NEXT: ( 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 ) + // CHECK-NEXT: ( 1, 5, 9, 13, 17, 21, 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23, 4, 8, 12, 16, 20, 24 ) + // CHECK-NEXT: ( 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2 ) + // + %10 = sparse_tensor.values %1 : tensor<2x3x4xf64, #Tensor1> to memref + %11 = sparse_tensor.indices %1, %c2 : tensor<2x3x4xf64, #Tensor1> to memref + %12 = sparse_tensor.values %2 : tensor<2x3x4xf64, #Tensor2> to memref + %13 = sparse_tensor.indices %2, %c2 : tensor<2x3x4xf64, #Tensor2> to memref + %14 = sparse_tensor.values %3 : tensor<2x3x4xf64, #Tensor3> to memref + %15 = sparse_tensor.indices %3, %c2 : tensor<2x3x4xf64, #Tensor3> to memref + call @dump(%10, %11) : (memref, memref) -> () + call @dump(%12, %13) : (memref, memref) -> () + call @dump(%14, %15) : (memref, memref) -> () + + // + // Verify sparse conversions into tensor1. + // + // CHECK-NEXT: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 ) + // CHECK-NEXT: ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 ) + // CHECK-NEXT: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 ) + // CHECK-NEXT: ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 ) + // CHECK-NEXT: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 ) + // CHECK-NEXT: ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 ) + // + %20 = sparse_tensor.values %a : tensor<2x3x4xf64, #Tensor1> to memref + %21 = sparse_tensor.indices %a, %c2 : tensor<2x3x4xf64, #Tensor1> to memref + %22 = sparse_tensor.values %b : tensor<2x3x4xf64, #Tensor1> to memref + %23 = sparse_tensor.indices %b, %c2 : tensor<2x3x4xf64, #Tensor1> to memref + %24 = sparse_tensor.values %c : tensor<2x3x4xf64, #Tensor1> to memref + %25 = sparse_tensor.indices %c, %c2 : tensor<2x3x4xf64, #Tensor1> to memref + call @dump(%20, %21) : (memref, memref) -> () + call @dump(%22, %23) : (memref, memref) -> () + call @dump(%24, %25) : (memref, memref) -> () + + // + // Verify sparse conversions into tensor2. + // + // CHECK-NEXT: ( 1, 13, 2, 14, 3, 15, 4, 16, 5, 17, 6, 18, 7, 19, 8, 20, 9, 21, 10, 22, 11, 23, 12, 24 ) + // CHECK-NEXT: ( 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 ) + // CHECK-NEXT: ( 1, 13, 2, 14, 3, 15, 4, 16, 5, 17, 6, 18, 7, 19, 8, 20, 9, 21, 10, 22, 11, 23, 12, 24 ) + // CHECK-NEXT: ( 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 ) + // CHECK-NEXT: ( 1, 13, 2, 14, 3, 15, 4, 16, 5, 17, 6, 18, 7, 19, 8, 20, 9, 21, 10, 22, 11, 23, 12, 24 ) + // CHECK-NEXT: ( 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 ) + // + %30 = sparse_tensor.values %d : tensor<2x3x4xf64, #Tensor2> to memref + %31 = sparse_tensor.indices %d, %c2 : tensor<2x3x4xf64, #Tensor2> to memref + %32 = sparse_tensor.values %e : tensor<2x3x4xf64, #Tensor2> to memref + %33 = sparse_tensor.indices %e, %c2 : tensor<2x3x4xf64, #Tensor2> to memref + %34 = sparse_tensor.values %f : tensor<2x3x4xf64, #Tensor2> to memref + %35 = sparse_tensor.indices %f, %c2 : tensor<2x3x4xf64, #Tensor2> to memref + call @dump(%30, %31) : (memref, memref) -> () + call @dump(%32, %33) : (memref, memref) -> () + call @dump(%34, %35) : (memref, memref) -> () + + // + // Verify sparse conversions into tensor3. + // + // CHECK-NEXT: ( 1, 5, 9, 13, 17, 21, 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23, 4, 8, 12, 16, 20, 24 ) + // CHECK-NEXT: ( 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2 ) + // CHECK-NEXT: ( 1, 5, 9, 13, 17, 21, 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23, 4, 8, 12, 16, 20, 24 ) + // CHECK-NEXT: ( 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2 ) + // CHECK-NEXT: ( 1, 5, 9, 13, 17, 21, 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23, 4, 8, 12, 16, 20, 24 ) + // CHECK-NEXT: ( 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2 ) + // + %40 = sparse_tensor.values %g : tensor<2x3x4xf64, #Tensor3> to memref + %41 = sparse_tensor.indices %g, %c2 : tensor<2x3x4xf64, #Tensor3> to memref + %42 = sparse_tensor.values %h : tensor<2x3x4xf64, #Tensor3> to memref + %43 = sparse_tensor.indices %h, %c2 : tensor<2x3x4xf64, #Tensor3> to memref + %44 = sparse_tensor.values %i : tensor<2x3x4xf64, #Tensor3> to memref + %45 = sparse_tensor.indices %i, %c2 : tensor<2x3x4xf64, #Tensor3> to memref + call @dump(%40, %41) : (memref, memref) -> () + call @dump(%42, %43) : (memref, memref) -> () + call @dump(%44, %45) : (memref, memref) -> () + + return + } +} +