diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp --- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp @@ -541,8 +541,7 @@ // equivalent. if (config.alwaysIncludeLeaves) result.insert(value); - } else { - workingSet.insert(a.opOperand->get()); + continue; } if (config.followInPlaceOnly && !isInPlace(*a.opOperand)) { diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir @@ -231,3 +231,31 @@ %0 = tensor.insert_slice %t0 into %t1 [2] [5] [1] : tensor<5xf32> into tensor<10xf32> return %0 : tensor<10xf32> } + +// ----- + +// This is a regression test. Make sure that there is no crash. + +// CHECK-LABEL: func.func @regression_eliminate_equivalent_only( +func.func @regression_eliminate_equivalent_only(%sz: index, %p: index, %t0: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c8 = arith.constant 8 : index + %c16 = arith.constant 16 : index + %27 = tensor.empty(%sz) : tensor + %extracted_slice = tensor.extract_slice %27[0, 0] [%p, 8] [1, 1] : tensor to tensor + %28 = scf.for %arg4 = %c0 to %c16 step %c8 iter_args(%arg5 = %t0) -> (tensor) { + %inserted_slice = tensor.insert_slice %extracted_slice into %27[0, 0] [%sz, 8] [1, 1] : tensor into tensor + %extracted_slice_2 = tensor.extract_slice %arg5[%p, %p] [%sz, 8] [1, 1] : tensor to tensor + %32 = linalg.generic + {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], + iterator_types = ["parallel", "parallel"]} + ins(%inserted_slice : tensor) outs(%extracted_slice_2 : tensor) { + ^bb0(%in: i32, %out: i8): + %tr = arith.trunci %in : i32 to i8 + linalg.yield %tr : i8 + } -> tensor + %inserted_slice_3 = tensor.insert_slice %32 into %arg5[%p, %arg4] [%sz, 8] [1, 1] : tensor into tensor + scf.yield %inserted_slice_3 : tensor + } + func.return %28 : tensor +}