diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -60,7 +60,8 @@ /// Constructor: take an array of tensors inputs, on which the generated loops /// will iterate on. The index of the tensor in the array is also the /// tensor id (tid) used in related functions. - explicit SparseTensorLoopEmitter(ValueRange tensors); + explicit SparseTensorLoopEmitter(ValueRange tensors, + bool isLastOutput = false); /// /// Core functions. @@ -140,6 +141,7 @@ std::vector> idxBuffer; // to_indices std::vector valBuffer; // to_value + bool isLastOutput; // Is the last tensor output tensor std::vector loopStack; // TODO: not yet used, it should track the current level for each tensor // to help eliminate `dim` paramters from above APIs. diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -44,15 +44,19 @@ // Sparse tensor loop emitter class implementations //===----------------------------------------------------------------------===// -SparseTensorLoopEmitter::SparseTensorLoopEmitter(ValueRange tensors) +SparseTensorLoopEmitter::SparseTensorLoopEmitter(ValueRange tensors, + bool isLastOutput) : tensors(tensors.begin(), tensors.end()), dims(tensors.size()), pidxs(tensors.size()), coord(tensors.size()), highs(tensors.size()), sizes(tensors.size()), ptrBuffer(tensors.size()), - idxBuffer(tensors.size()), valBuffer(tensors.size()), loopStack(), - curLv(tensors.size(), 0) { + idxBuffer(tensors.size()), valBuffer(tensors.size()), + isLastOutput(isLastOutput), loopStack(), curLv(tensors.size(), 0) { for (size_t i = 0, e = tensors.size(); i < e; i++) { auto t = tensors[i]; - auto rtp = t.getType().cast(); + auto rtp = t.getType().dyn_cast(); + if (!rtp) // a scalar (0-dimension tensors) + continue; + auto rank = static_cast(rtp.getRank()); auto enc = getSparseTensorEncoding(rtp); if (enc) @@ -100,7 +104,14 @@ ptrBuffer[t][d] = builder.create(loc, ptrTp, tensor, dim); idxBuffer[t][d] = builder.create(loc, indTp, tensor, dim); } else if (isSingletonDim(dims[t][d])) { - llvm_unreachable("TODO: not implemented yet"); + // Singleton dimension, fetch indices. + auto indTp = + MemRefType::get(dynShape, getIndexOverheadType(builder, enc)); + auto dim = builder.getIndexAttr(d); + idxBuffer[t][d] = builder.create(loc, indTp, tensor, dim); + } else { + // Dense dimension, nothing to fetch. + assert(isDenseDim(dims[t][d])); } // Find upper bound in current dimension. @@ -116,9 +127,11 @@ if (!enc) { // Non-annotated dense tensors. auto denseTp = MemRefType::get(shape, elementType); - // This is not the output tensor - valBuffer[t] = - builder.create(loc, denseTp, tensor); + if (isLastOutput && t == tensors.size() - 1) + llvm_unreachable("TODO: not yet handled"); + else + valBuffer[t] = + builder.create(loc, denseTp, tensor); } else { // Annotated sparse tensors. auto dynShape = {ShapedType::kDynamicSize}; @@ -137,10 +150,12 @@ // We can not re-enter the same level. assert(!coord[tid][dim]); Value step = constantIndex(builder, loc, 1); - bool isCompressed = isCompressedDim(dims[tid][dim]); - assert(isDenseDim(dims[tid][dim]) || isCompressedDim(dims[tid][dim])); + auto dimType = dims[tid][dim]; + bool isSparse = isCompressedDim(dimType) || isSingletonDim(dimType); + assert(isDenseDim(dimType) || isCompressedDim(dimType) || + isSingletonDim(dimType)); - Value lo = isCompressed ? pidxs[tid][dim] : constantIndex(builder, loc, 0); + Value lo = isSparse ? pidxs[tid][dim] : constantIndex(builder, loc, 0); Value hi = highs[tid][dim]; // TODO: support reduction. @@ -153,7 +168,7 @@ Operation *loop = forOp; assert(iv); - if (isCompressed) { + if (isSparse) { pidxs[tid][dim] = iv; // Generating a load on the indices array yields the coordinate. Value ptr = idxBuffer[tid][dim]; @@ -191,26 +206,33 @@ // TODO: generate loop iteration on output tensor based on the shape // instead of pointer/indices arrays. assert(dims[tid].size() > dim); + auto dimType = dims[tid][dim]; - if (isDenseDim(dims[tid][dim])) + if (isDenseDim(dimType)) return false; // Either the first dimension, or the previous dimension has been set. assert(dim == 0 || pidxs[tid][dim - 1]); - if (isCompressedDim(dims[tid][dim])) { + Value c0 = constantIndex(builder, loc, 0); + Value c1 = constantIndex(builder, loc, 1); + if (isCompressedDim(dimType)) { Value ptr = ptrBuffer[tid][dim]; - Value c1 = constantIndex(builder, loc, 1); - Value pLo = dim == 0 ? constantIndex(builder, loc, 0) : pidxs[tid][dim - 1]; + + Value pLo = dim == 0 ? c0 : pidxs[tid][dim - 1]; Value pHi = builder.create(loc, pLo, c1); pidxs[tid][dim] = genIndexLoad(builder, loc, ptr, pLo); highs[tid][dim] = genIndexLoad(builder, loc, ptr, pHi); - return true; } + if (isSingletonDim(dimType)) { + Value pLo = dim == 0 ? c0 : pidxs[tid][dim - 1]; + Value pHi = builder.create(loc, pLo, c1); - if (isSingletonDim(dims[tid][dim])) - llvm_unreachable("TODO: not implemented yet"); + pidxs[tid][dim] = pLo; + highs[tid][dim] = pHi; + return true; + } llvm_unreachable("Unrecognizable dimesion type!"); } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_codegen_foreach.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_codegen_foreach.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_codegen_foreach.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_codegen_foreach.mlir @@ -16,6 +16,15 @@ dimOrdering = affine_map<(i,j) -> (j,i)> }> +#SortedCOO = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed-nu", "singleton" ] +}> + +#SortedCOOPerm = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed-nu", "singleton" ], + dimOrdering = affine_map<(i,j) -> (j,i)> +}> + module { /// uses foreach operator to print coords and values. @@ -49,6 +58,26 @@ return } + func.func @foreach_print_4(%arg0: tensor<2x2xf64, #SortedCOO>) { + sparse_tensor.foreach in %arg0 : tensor<2x2xf64, #SortedCOO> do { + ^bb0(%1: index, %2: index, %v: f64) : + vector.print %1: index + vector.print %2: index + vector.print %v: f64 + } + return + } + + func.func @foreach_print_5(%arg0: tensor<2x2xf64, #SortedCOOPerm>) { + sparse_tensor.foreach in %arg0 : tensor<2x2xf64, #SortedCOOPerm> do { + ^bb0(%1: index, %2: index, %v: f64) : + vector.print %1: index + vector.print %2: index + vector.print %v: f64 + } + return + } + // // Main driver. // @@ -67,6 +96,8 @@ %s1 = sparse_tensor.convert %src : tensor<2x2xf64> to tensor<2x2xf64, #Row> %s2 = sparse_tensor.convert %src : tensor<2x2xf64> to tensor<2x2xf64, #CSR> %s3 = sparse_tensor.convert %src : tensor<2x2xf64> to tensor<2x2xf64, #DCSC> + %s4 = sparse_tensor.convert %src : tensor<2x2xf64> to tensor<2x2xf64, #SortedCOO> + %s5 = sparse_tensor.convert %src : tensor<2x2xf64> to tensor<2x2xf64, #SortedCOOPerm> // CHECK: 0 // CHECK-NEXT: 0 // CHECK-NEXT: 1 @@ -106,10 +137,38 @@ // CHECK-NEXT: 1 // CHECK-NEXT: 6 call @foreach_print_3(%s3) : (tensor<2x2xf64, #DCSC>) -> () + // CHECK-NEXT: 0 + // CHECK-NEXT: 0 + // CHECK-NEXT: 1 + // CHECK-NEXT: 0 + // CHECK-NEXT: 1 + // CHECK-NEXT: 2 + // CHECK-NEXT: 1 + // CHECK-NEXT: 0 + // CHECK-NEXT: 5 + // CHECK-NEXT: 1 + // CHECK-NEXT: 1 + // CHECK-NEXT: 6 + call @foreach_print_4(%s4) : (tensor<2x2xf64, #SortedCOO>) -> () + // CHECK-NEXT: 0 + // CHECK-NEXT: 0 + // CHECK-NEXT: 1 + // CHECK-NEXT: 1 + // CHECK-NEXT: 0 + // CHECK-NEXT: 5 + // CHECK-NEXT: 0 + // CHECK-NEXT: 1 + // CHECK-NEXT: 2 + // CHECK-NEXT: 1 + // CHECK-NEXT: 1 + // CHECK-NEXT: 6 + call @foreach_print_5(%s5) : (tensor<2x2xf64, #SortedCOOPerm>) -> () bufferization.dealloc_tensor %s1 : tensor<2x2xf64, #Row> bufferization.dealloc_tensor %s2 : tensor<2x2xf64, #CSR> bufferization.dealloc_tensor %s3 : tensor<2x2xf64, #DCSC> + bufferization.dealloc_tensor %s4 : tensor<2x2xf64, #SortedCOO> + bufferization.dealloc_tensor %s5 : tensor<2x2xf64, #SortedCOOPerm> return }