diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp --- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp +++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp @@ -903,6 +903,31 @@ continue; } + // If uConflictingWrite is an InsertSliceOp... + if (auto insertSliceOp = dyn_cast(conflictingWritingOp)) + // As an example, consider the following IR. + // + // %0 = tensor.extract_slice %t[%a, %b][%c, %d][1, 1] {inplace= [true] } + // %1 = linalg.fill %cst, %0 {inplace= [true] } + // %2 = tensor.insert_slice %1 into %t[%a, %b][%c, %d][1, 1] + // {inplace= [true] } + // %3 = vector.transfer_read %1, %cst + // + // In the above example: + // uRead = OpOperand 0 (%1) of vector.transfer_read + // uConflictingWrite = OpOperand 1 (%t) of tensor.insert_slice + // lastWrite = %1 + // + // This is not a conflict because the InsertSliceOp overwrites the + // memory segment of %1 with the exact same data. (Effectively, there + // is no memory write here.) + if (uConflictingWrite == &insertSliceOp->getOpOperand(1) /*dest*/ && + aliasInfo.areEquivalentBufferizedValues(uRead->get(), + insertSliceOp.source()) && + hasMatchingExtractSliceOp(aliasInfo, insertSliceOp.source(), + insertSliceOp)) + continue; + // All requirements are met. Conflict found! LDBG("CONFLICT CONFIRMED!\n\n"); return true; diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir --- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir +++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir @@ -213,6 +213,72 @@ // ----- +// CHECK-LABEL: @read_of_matching_insert_slice_source +func @read_of_matching_insert_slice_source( + %A : tensor {linalg.inplaceable = true}, %idx : index, %idx2 : index) + -> (tensor, vector<5xf32>) +{ + %cst = arith.constant 0.0 : f32 + %cst2 = arith.constant 1.0 : f32 + + // CHECK: tensor.extract_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %0 = tensor.extract_slice %A[%idx][%idx][1] : tensor to tensor + + // CHECK: linalg.fill + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %1 = linalg.fill(%cst, %0) : f32, tensor -> tensor + + // CHECK: tensor.insert_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %2 = tensor.insert_slice %1 into %A[%idx][%idx][1] : tensor into tensor + + %3 = vector.transfer_read %1[%idx2], %cst2 : tensor, vector<5xf32> + return %2, %3 : tensor, vector<5xf32> +} + +// ----- + +// CHECK-LABEL: @read_of_matching_insert_slice_source_interleaved +func @read_of_matching_insert_slice_source_interleaved( + %A : tensor {linalg.inplaceable = true}, %idx : index, %idx2 : index, + %idx3 : index) + -> (tensor, vector<5xf32>) +{ + %cst = arith.constant 0.0 : f32 + %cst2 = arith.constant 1.0 : f32 + + // CHECK: tensor.extract_slice + // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + %0 = tensor.extract_slice %A[%idx][%idx][1] : tensor to tensor + + // CHECK: linalg.fill + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %1 = linalg.fill(%cst, %0) : f32, tensor -> tensor + + // CHECK: tensor.insert_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %2 = tensor.insert_slice %1 into %A[%idx][%idx][1] : tensor into tensor + + // CHECK: tensor.extract_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %4 = tensor.extract_slice %2[%idx3][%idx3][1] : tensor to tensor + + // CHECK: linalg.fill + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %5 = linalg.fill(%cst, %4) : f32, tensor -> tensor + + %3 = vector.transfer_read %1[%idx2], %cst2 : tensor, vector<5xf32> + + // CHECK: tensor.insert_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %6 = tensor.insert_slice %5 into %2[%idx3][%idx3][1] : tensor into tensor + + return %6, %3 : tensor, vector<5xf32> +} + +// ----- + // CHECK-LABEL: func @extract_slice_linalg_readonly_use func @extract_slice_linalg_readonly_use( %A : tensor, @@ -946,6 +1012,28 @@ // ----- +// CHECK-LABEL: func @extract_once_insert_twice +func @extract_once_insert_twice( + %arg2: tensor<62x90xf32> {linalg.inplaceable = true}) + -> (tensor<62x90xf32>) +{ + // CHECK: tensor.extract_slice + // CHECK-SAME: {__inplace_results_attr__ = ["false"] + %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32> + + // CHECK: tensor.insert_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"] + %8 = tensor.insert_slice %2 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> + + // CHECK: tensor.insert_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"] + %15 = tensor.insert_slice %2 into %8[15, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> + + return %15 : tensor<62x90xf32> +} + +// ----- + #accesses = [ affine_map<(i) -> (i)> ]