Index: mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td =================================================================== --- mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td +++ mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td @@ -1207,7 +1207,6 @@ ]; let hasCanonicalizer = 1; - let hasFolder = 1; let hasVerifier = 1; } Index: mlir/lib/Dialect/Tensor/IR/TensorOps.cpp =================================================================== --- mlir/lib/Dialect/Tensor/IR/TensorOps.cpp +++ mlir/lib/Dialect/Tensor/IR/TensorOps.cpp @@ -2365,12 +2365,6 @@ }; } // namespace -LogicalResult -ParallelInsertSliceOp::fold(ArrayRef operands, - SmallVectorImpl &results) { - return foldInsertOp(*this, operands); -} - void ParallelInsertSliceOp::getCanonicalizationPatterns( RewritePatternSet &results, MLIRContext *context) { results.add, Index: mlir/test/Dialect/Tensor/canonicalize.mlir =================================================================== --- mlir/test/Dialect/Tensor/canonicalize.mlir +++ mlir/test/Dialect/Tensor/canonicalize.mlir @@ -1466,3 +1466,24 @@ } return %2 : tensor } + +// ----- + +// CHECK-LABEL: func.func @dont_fold_parallel_insert_slice( +// CHECK-SAME: %[[arg0:[0-9a-z]*]]: tensor<1x5xf32>, +// CHECK-SAME: %[[arg1:[0-9a-z]*]]: tensor<1x5xf32>) +func.func @dont_fold_parallel_insert_slice( + %arg0 : tensor<1x5xf32>, %arg1: tensor<1x5xf32>) -> tensor<1x5xf32> +{ + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + // CHECK: scf.foreach_thread () in () -> (tensor<1x5xf32>) { + // CHECK-NEXT: scf.foreach_thread.perform_concurrently { + // CHECK-NEXT: tensor.parallel_insert_slice %[[arg0]] into %[[arg1]][0, 0] [1, 5] [1, 1] : tensor<1x5xf32> into tensor<1x5xf32> + %2 = scf.foreach_thread () in () -> (tensor<1x5xf32>) { + scf.foreach_thread.perform_concurrently { + tensor.parallel_insert_slice %arg0 into %arg1[%c0, %c0] [1, 5] [%c1, %c1] : tensor<1x5xf32> into tensor<1x5xf32> + } + } + return %2 : tensor<1x5xf32> +}