diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp @@ -15,6 +15,7 @@ #include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" using namespace mlir; using namespace mlir::sparse_tensor; @@ -206,7 +207,14 @@ Type elementType = rtp.getElementType(); if (!enc) { // Non-annotated dense tensors. - auto denseTp = MemRefType::get(shape, elementType); + BaseMemRefType denseTp = MemRefType::get(shape, elementType); + + // TODO: if we unconditionally use fully dynamic layout here, it breaks + // some vectorization passes which requires static stride = 1. + // Is it possible to call vectorization pass after bufferization? + if (llvm::isa_and_nonnull(tensor.getDefiningOp())) + denseTp = bufferization::getMemRefTypeWithFullyDynamicLayout(rtp); + Value denseVal = builder.create(loc, denseTp, tensor); // Dense outputs need special handling. diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_foreach_slices.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_foreach_slices.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_foreach_slices.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_foreach_slices.mlir @@ -69,25 +69,24 @@ // call @foreach_print_slice(%a) : (tensor<4x4xf64, #CSR_SLICE>) -> () - // FIXME: investigate why a tensor copy is inserted for this slice -// %dense = tensor.extract_slice %sa[1, 1][4, 4][1, 2] : tensor<8x8xf64> to -// tensor<4x4xf64> -// %b = sparse_tensor.convert %dense : tensor<4x4xf64> to tensor<4x4xf64, #CSR> -// // Foreach on sparse tensor instead of slice they should yield the same result. -// // -// // C_HECK-NEXT: 1 -// // C_HECK-NEXT: 0 -// // C_HECK-NEXT: 2.3 -// // C_HECK-NEXT: 2 -// // C_HECK-NEXT: 3 -// // C_HECK-NEXT: 1 -// // C_HECK-NEXT: 3 -// // C_HECK-NEXT: 2 -// // C_HECK-NEXT: 2.1 -// // -// call @foreach_print_non_slice(%b) : (tensor<4x4xf64, #CSR>) -> () -// bufferization.dealloc_tensor %b : tensor<4x4xf64, #CSR> + %dense = tensor.extract_slice %sa[1, 1][4, 4][1, 2] : tensor<8x8xf64> to + tensor<4x4xf64> + %b = sparse_tensor.convert %dense : tensor<4x4xf64> to tensor<4x4xf64, #CSR> + // Foreach on sparse tensor instead of slice should yield the same result. + // + // CHECK-NEXT: 1 + // CHECK-NEXT: 0 + // CHECK-NEXT: 2.3 + // CHECK-NEXT: 2 + // CHECK-NEXT: 3 + // CHECK-NEXT: 1 + // CHECK-NEXT: 3 + // CHECK-NEXT: 2 + // CHECK-NEXT: 2.1 + // + call @foreach_print_non_slice(%b) : (tensor<4x4xf64, #CSR>) -> () + bufferization.dealloc_tensor %b : tensor<4x4xf64, #CSR> bufferization.dealloc_tensor %tmp : tensor<8x8xf64, #CSR> return }