diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir @@ -559,121 +559,6 @@ return %rA, %rB, %rC: tensor, tensor, tensor } -//===----------------------------------------------------------------------===// -// Simple loop cases -//===----------------------------------------------------------------------===// - -// ----- - -// CHECK-LABEL: func @scf_for_yield_only -func.func @scf_for_yield_only( - %A : tensor {bufferization.writable = false}, - %B : tensor {bufferization.writable = true}, - %lb : index, - %ub : index, - %step : index) - -> (tensor, tensor) -{ - // CHECK: scf.for - // CHECK-NEXT: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "false"]} - %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor) { - scf.yield %t : tensor - } - - // CHECK: scf.for - // CHECK-NEXT: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]} - %r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %B) -> (tensor) { - scf.yield %t : tensor - } - - // CHECK: return - // CHECK-SAME: __equivalent_func_args__ = [-1, 1] - return %r0, %r1: tensor, tensor -} - -// ----- - -// CHECK-LABEL: func @scf_for_with_tensor.insert_slice -func.func @scf_for_with_tensor.insert_slice( - %A : tensor {bufferization.writable = false}, - %B : tensor {bufferization.writable = true}, - %C : tensor<4xf32> {bufferization.writable = false}, - %lb : index, - %ub : index, - %step : index) - -> (tensor, tensor) -{ - // CHECK: scf.for - // scf.for bbArgs are always inplaceable seen from ops inside the body: - // 1. Either the matching tensor is not inplaceable and an alloc occurs - // which makes bbArg inplaceable. - // 2. Or it is already inplaceable and so is bbArg. - // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} - // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} - // CHECK-NEXT: scf.yield {__inplace_operands_attr__ = ["true", "true"]} - // CHECK-NEXT: } {__inplace_operands_attr__ = ["none", "none", "none", "false", "true"]} - %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B) - -> (tensor, tensor) - { - %ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor - %ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor - scf.yield %ttA, %ttB : tensor, tensor - } - - // CHECK: return - // CHECK-SAME: __equivalent_func_args__ = [-1, 1] - return %r0#0, %r0#1: tensor, tensor -} - -// ----- - -func.func private @some_use(tensor) -> () - -// CHECK-LABEL: func @scf_for_deps -func.func @scf_for_deps( - %A : tensor {bufferization.writable = true}, - %B : tensor {bufferization.writable = true}, - %lb : index, - %ub : index, - %step : index) - -> (tensor) -{ - // %r0 must be out of place because one use of %t in the subsequent production - // of %r1 is read. - // CHECK: scf.for - // CHECK-NEXT: call - // CHECK-SAME: {__inplace_operands_attr__ = ["false"]} - // CHECK-NEXT: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "false"]} - %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor) { - func.call @some_use(%t) : (tensor) -> () - scf.yield %t : tensor - } - - // %r1 bufferizes inplace fine. - // CHECK: scf.for - // CHECK-NEXT: call - // CHECK-SAME: {__inplace_operands_attr__ = ["false"]} - // CHECK-NEXT: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]} - %r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor) { - func.call @some_use(%t) : (tensor) -> () - scf.yield %t : tensor - } - - // CHECK: return - // CHECK-SAME: __equivalent_func_args__ = [0] - return %r1: tensor -} - // ----- //===----------------------------------------------------------------------===// @@ -1148,465 +1033,6 @@ // ----- -#accesses = [ - affine_map<(i) -> (i)> -] -#trait = { - indexing_maps = #accesses, - iterator_types = ["parallel"] -} - -// CHECK-LABEL: func @reading_scf_for -func.func @reading_scf_for(%t1: tensor {bufferization.writable = true}, - %s: index, %v: vector<5xf32>) -> (tensor, vector<5xf32>) { - - %c0 = arith.constant 0 : index - %c1 = arith.constant 1 : index - %cst = arith.constant 0.0 : f32 - - // Write to %t1. - // CHECK: vector.transfer_write - // CHECK-SAME: __inplace_operands_attr__ = ["none", "false", "none"] - %t3 = vector.transfer_write %v, %t1[%s] : vector<5xf32>, tensor - - // Read the old value of %t1 inside the loop via an alias. - // CHECK: scf.for {{.*}} { - %r, %v3 = scf.for %i = %c0 to %s step %c1 iter_args(%t2 = %t1, %v0 = %v) -> (tensor, vector<5xf32>) { - // CHECK: tensor.extract_slice - // CHECK-SAME: __inplace_operands_attr__ = ["true", "none", "none"] - %e = tensor.extract_slice %t2[%s][%s][1] : tensor to tensor - - // Read from %t1 via alias %e. - %v2 = vector.transfer_read %e[%s], %cst : tensor, vector<5xf32> - scf.yield %t2, %v2 : tensor, vector<5xf32> - } - // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true", "none"]} - - // Use %t3 in some way without reading it, so that it does not get DCE'd. - // CHECK: linalg.generic - // CHECK-SAME: __inplace_operands_attr__ = ["true"] - %o = linalg.generic #trait outs (%t3 : tensor) { - ^bb(%0: f32) : - linalg.yield %cst : f32 - } -> (tensor) - - return %o, %v3 : tensor, vector<5xf32> -} - -// ----- - -#accesses = [ - affine_map<(i) -> (i)> -] -#trait = { - indexing_maps = #accesses, - iterator_types = ["parallel"] -} - -// CHECK-LABEL: func @non_reading_scf_for -func.func @non_reading_scf_for(%t1: tensor {bufferization.writable = true}, - %s: index, %v: vector<5xf32>) -> (tensor, vector<5xf32>) { - - %c0 = arith.constant 0 : index - %c1 = arith.constant 1 : index - %cst = arith.constant 0.0 : f32 - - // Write to %t1. - // CHECK: vector.transfer_write - // CHECK-SAME: __inplace_operands_attr__ = ["none", "true", "none"] - %t3 = vector.transfer_write %v, %t1[%s] : vector<5xf32>, tensor - - // This loop does not read from %t1. It only writes to it. - // CHECK: scf.for - %r, %v3 = scf.for %i = %c0 to %s step %c1 iter_args(%t2 = %t1, %v0 = %v) -> (tensor, vector<5xf32>) { - // Write to %t1 via %t2. (Overwrite %t3.) - // CHECK: linalg.generic - // CHECK-SAME: __inplace_operands_attr__ = ["true"] - %o2 = linalg.generic #trait outs (%t2 : tensor) { - ^bb(%0: f32) : - linalg.yield %cst : f32 - } -> (tensor) - - // Read overwritten value. This is not a read of %t1. - %v2 = vector.transfer_read %o2[%s], %cst : tensor, vector<5xf32> - scf.yield %o2, %v2 : tensor, vector<5xf32> - } - - // Use %t3 in some way without reading it, so that it does not get DCE'd. - // CHECK: linalg.generic - // CHECK-SAME: __inplace_operands_attr__ = ["true"] - %o = linalg.generic #trait outs (%t3 : tensor) { - ^bb(%0: f32) : - linalg.yield %cst : f32 - } -> (tensor) - - // CHECK: return - // CHECK-SAME: __equivalent_func_args__ = [0, -1] - return %o, %v3 : tensor, vector<5xf32> -} - -// ----- - -//===----------------------------------------------------------------------===// -// scf.if cases -//===----------------------------------------------------------------------===// - -// This example passes analysis, but it fails when bufferizing. -// CHECK-LABEL: func @scf_if_inplace1 -func.func @scf_if_inplace1(%t1: tensor {bufferization.writable = true}, - %t2: tensor {bufferization.writable = true}, - %cond: i1) -> tensor { - %r = scf.if %cond -> (tensor) { - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - scf.yield %t1 : tensor - } else { - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - scf.yield %t2 : tensor - } - return %r : tensor -} - -// ----- - -// CHECK-LABEL: func @scf_if_inplace2 -func.func @scf_if_inplace2(%t1: tensor {bufferization.writable = true}, - %v: vector<5xf32>, %idx: index, - %cond: i1) -> tensor { - %r = scf.if %cond -> (tensor) { - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - scf.yield %t1 : tensor - } else { - // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] - %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor - scf.yield %t2 : tensor - } - // CHECK: return - // CHECK-SAME: __equivalent_func_args__ = [0] - return %r : tensor -} - -// ----- - -// CHECK-LABEL: func @scf_if_inplace3 -func.func @scf_if_inplace3(%t1: tensor {bufferization.writable = true}, - %v1: vector<5xf32>, %v2: vector<5xf32>, %idx: index, - %cond: i1) -> tensor { - // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"] - %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor to tensor - %r = scf.if %cond -> (tensor) { - // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] - %t2 = vector.transfer_write %v1, %e[%idx] : vector<5xf32>, tensor - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - scf.yield %t2 : tensor - } else { - // Writing the same tensor through an alias. This is OK. - // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] - %t3 = vector.transfer_write %v2, %t1[%idx] : vector<5xf32>, tensor - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - scf.yield %t3 : tensor - } - return %r : tensor -} - -// ----- - -// CHECK-LABEL: func @scf_if_in_place4 -func.func @scf_if_in_place4(%t1: tensor {bufferization.writable = true}, - %v: vector<5xf32>, %idx: index, - %cond: i1, %cond2: i1) -> (tensor, vector<10xf32>) { - %cst = arith.constant 0.0 : f32 - %r = scf.if %cond -> (tensor) { - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - scf.yield %t1 : tensor - } else { - // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] - %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - scf.yield %t2 : tensor - } - %r_alias = scf.if %cond2 -> (tensor) { - // Reading %r is OK. No conflict. - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - scf.yield %r : tensor - } else { - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - scf.yield %r : tensor - } - %v2 = vector.transfer_read %r_alias[%idx], %cst : tensor, vector<10xf32> - - // CHECK: return - // CHECK-SAME: __equivalent_func_args__ = [0, -1] - return %r_alias, %v2 : tensor, vector<10xf32> -} - -// ----- - -// CHECK-LABEL: func @scf_if_inplace5 -func.func @scf_if_inplace5(%t1: tensor {bufferization.writable = true}, - %idx: index, %cond: i1) -> tensor { - %r = scf.if %cond -> (tensor) { - // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"] - %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor to tensor - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - scf.yield %e : tensor - } else { - // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"] - %f = tensor.extract_slice %t1[%idx][%idx][1] : tensor to tensor - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - scf.yield %f : tensor - } - - // Inserting into an equivalent tensor at the same offset. This bufferizes - // inplace. - // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"] - %r2 = tensor.insert_slice %r into %t1[%idx][%idx][1] : tensor into tensor - - // CHECK: return - // CHECK-SAME: __equivalent_func_args__ = [0] - return %r2 : tensor -} - -// ----- - -// CHECK-LABEL: func @scf_if_inplace6 -func.func @scf_if_inplace6(%t1: tensor {bufferization.writable = true}, - %v1: vector<5xf32>, %v2: vector<5xf32>, - %v3: vector<5xf32>, %idx: index, - %cond: i1, %cond2: i1) -> tensor { - // Test nested scf.if ops. - %r = scf.if %cond -> (tensor) { - %t2 = scf.if %cond2 -> (tensor) { - // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] - %t3 = vector.transfer_write %v1, %t1[%idx] : vector<5xf32>, tensor - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - scf.yield %t3 : tensor - } else { - // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] - %t4 = vector.transfer_write %v3, %t1[%idx] : vector<5xf32>, tensor - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - scf.yield %t4 : tensor - } - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - scf.yield %t2 : tensor - } else { - // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] - %t3 = vector.transfer_write %v2, %t1[%idx] : vector<5xf32>, tensor - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - scf.yield %t3 : tensor - } - - // CHECK: return - // CHECK-SAME: __equivalent_func_args__ = [0] - return %r : tensor -} - -// ----- - -// CHECK-LABEL: func @scf_if_inplace7 -func.func @scf_if_inplace7(%t1: tensor {bufferization.writable = true}, - %v1: vector<5xf32>, %v2: vector<5xf32>, %idx: index, - %idx2: index, %cond: i1) -> (tensor, vector<5xf32>) { - %cst = arith.constant 0.0 : f32 - %r, %v_r2 = scf.if %cond -> (tensor, vector<5xf32>) { - // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] - %t2 = vector.transfer_write %v1, %t1[%idx] : vector<5xf32>, tensor - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]} - scf.yield %t2, %v1 : tensor, vector<5xf32> - } else { - // Writing the same tensor through an alias. - // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"] - %t3 = vector.transfer_write %v2, %t1[%idx] : vector<5xf32>, tensor - // Read the original value of %t1. This requires the write in this branch - // to be out-of-place. But the write in the other branch can still be - // inplace. - %v_r = vector.transfer_read %t1[%idx2], %cst : tensor, vector<5xf32> - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]} - scf.yield %t3, %v_r : tensor, vector<5xf32> - } - return %r, %v_r2 : tensor, vector<5xf32> -} - -// ----- - -// CHECK-LABEL: func @scf_if_out_of_place1a -func.func @scf_if_out_of_place1a(%t1: tensor {bufferization.writable = true}, - %idx: index, %idx2: index, - %cond: i1) -> tensor { - %r = scf.if %cond -> (tensor) { - // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"] - %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor to tensor - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - scf.yield %e : tensor - } else { - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - scf.yield %t1 : tensor - } - - // Reading from and writing to the same tensor via different args. This is a - // conflict. - // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none", "none"] - %r2 = tensor.insert_slice %r into %t1[%idx2][%idx2][1] : tensor into tensor - return %r2 : tensor -} - -// ----- - -// CHECK-LABEL: func @scf_if_out_of_place1b -func.func @scf_if_out_of_place1b(%t1: tensor {bufferization.writable = true}, - %idx: index, %idx2: index, %idx3: index, - %cond: i1) -> tensor { - %r = scf.if %cond -> (tensor) { - // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"] - %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor to tensor - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - scf.yield %e : tensor - } else { - // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"] - %f = tensor.extract_slice %t1[%idx2][%idx2][1] : tensor to tensor - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - scf.yield %f : tensor - } - - // Reading from and writing to the same tensor via different args. This is a - // conflict. In contrast to scf_if_out_of_place1a, the fact that %r aliases - // with %t1 is only detected when analyzing the tensor.extract_slices. That's - // why the tensor.insert_slice is inplace and the two extract_slices are - // out-of-place. - // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"] - %r2 = tensor.insert_slice %r into %t1[%idx3][%idx3][1] : tensor into tensor - - // CHECK: return - // CHECK-SAME: __equivalent_func_args__ = [0] - return %r2 : tensor -} - -// ----- - -// CHECK-LABEL: func @scf_if_out_of_place1c -func.func @scf_if_out_of_place1c(%t1: tensor {bufferization.writable = true}, - %idx: index, %idx2: index, %cond: i1) -> tensor { - %r = scf.if %cond -> (tensor) { - // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"] - %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor to tensor - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - scf.yield %e : tensor - } else { - // TODO: This one could bufferize inplace, but the analysis is too restrictive. - // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"] - %f = tensor.extract_slice %t1[%idx2][%idx2][1] : tensor to tensor - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - scf.yield %f : tensor - } - - // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"] - %r2 = tensor.insert_slice %r into %t1[%idx2][%idx2][1] : tensor into tensor - - // CHECK: return - // CHECK-SAME: __equivalent_func_args__ = [0] - return %r2 : tensor -} - -// ----- - -// CHECK-LABEL: func @scf_if_out_of_place2 -func.func @scf_if_out_of_place2(%t1: tensor {bufferization.writable = true}, - %v: vector<5xf32>, %idx: index, - %cond: i1) -> (tensor, vector<10xf32>) { - %cst = arith.constant 0.0 : f32 - %r = scf.if %cond -> (tensor) { - scf.yield %t1 : tensor - } else { - // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"] - %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - scf.yield %t2 : tensor - } - - // Read the old value of %t1. Forces the transfer_write to bufferize - // out-of-place. - %v2 = vector.transfer_read %t1[%idx], %cst : tensor, vector<10xf32> - return %r, %v2 : tensor, vector<10xf32> -} - -// ----- - -// CHECK-LABEL: func @scf_if_out_of_place3 -func.func @scf_if_out_of_place3(%t1: tensor {bufferization.writable = true}, - %v: vector<5xf32>, %idx: index, - %cond: i1, %cond2: i1) -> (tensor, vector<10xf32>) { - %cst = arith.constant 0.0 : f32 - %r = scf.if %cond -> (tensor) { - scf.yield %t1 : tensor - } else { - // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"] - %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - scf.yield %t2 : tensor - } - %t1_alias = scf.if %cond2 -> (tensor) { - // scf.yield bufferizes to a read. That is a conflict in this example. - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - scf.yield %t1 : tensor - } else { - // CHECK: scf.yield - // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} - scf.yield %t1 : tensor - } - %v2 = vector.transfer_read %t1_alias[%idx], %cst : tensor, vector<10xf32> - return %r, %v2 : tensor, vector<10xf32> -} - -// ----- - // CHECK-LABEL: func @some_use func.func @some_use(%A : tensor {bufferization.writable = true}, %v : vector<5xf32>) -> (tensor) { @@ -1817,30 +1243,3 @@ return %r0 : tensor } - -// ----- - -// CHECK-LABEL: func @write_to_same_tensor_in_loop_in_place( -func.func @write_to_same_tensor_in_loop_in_place( - %A : tensor {linalg.inplaceable = true}, - %lb : index, %ub : index, %step : index, %sz: index) - -> (tensor) -{ - // CHECK: scf.for {{.*}} { - %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor) { - %B = linalg.init_tensor [%sz] : tensor - %i2 = arith.index_cast %i : index to i32 - %i3 = arith.sitofp %i2 : i32 to f32 - // The tensor.insert is in-place because the %B is defined inside the loop. - // CHECK: tensor.insert - // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]} - %B2 = tensor.insert %i3 into %B[%i] : tensor - // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} - %A2 = tensor.insert_slice %B2 into %t[%i][%sz][1] : tensor into tensor - scf.yield %A2 : tensor - } - // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]} - - return %r0 : tensor -} diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir --- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir +++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir @@ -286,142 +286,12 @@ return %r0: tensor } -//===----------------------------------------------------------------------===// -// Simple loop cases -//===----------------------------------------------------------------------===// - -// ----- - -// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> - -// CHECK-LABEL: func @scf_for_yield_only( -// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref, -// CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref -// CHECK-SAME: ) -> memref { -func.func @scf_for_yield_only( - %A : tensor {bufferization.writable = false}, - %B : tensor {bufferization.writable = true}, - %lb : index, %ub : index, %step : index) - -> (tensor, tensor) -{ - // CHECK: %[[ALLOC_FOR_A:.*]] = memref.alloc - // CHECK: memref.copy %[[A]], %[[ALLOC_FOR_A]] - - // The first scf.for remains but just turns into dead code. - %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor) { - scf.yield %t : tensor - } - - // The second scf.for remains but just turns into dead code. - %r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %B) -> (tensor) { - scf.yield %t : tensor - } - - // CHECK: return %[[ALLOC_FOR_A]] : memref - // CHECK-NOT: dealloc - return %r0, %r1: tensor, tensor -} - -// ----- - -// Ensure that the function bufferizes without error. This tests pre-order -// traversal of scf.for loops during bufferization. No need to check the IR, -// just want to make sure that it does not crash. - -// CHECK-LABEL: func @nested_scf_for -func.func @nested_scf_for(%A : tensor {bufferization.writable = true}, - %v : vector<5xf32>) -> tensor { - %c0 = arith.constant 0 : index - %c1 = arith.constant 1 : index - %c10 = arith.constant 10 : index - %r1 = scf.for %i = %c0 to %c10 step %c1 iter_args(%B = %A) -> tensor { - %r2 = scf.for %j = %c0 to %c10 step %c1 iter_args(%C = %B) -> tensor { - %w = vector.transfer_write %v, %C[%c0] : vector<5xf32>, tensor - scf.yield %w : tensor - } - scf.yield %r2 : tensor - } - return %r1 : tensor -} - -// ----- - -// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> - -// CHECK-LABEL: func @scf_for_with_tensor.insert_slice -// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref -// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref -// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]> -func.func @scf_for_with_tensor.insert_slice( - %A : tensor {bufferization.writable = false}, - %B : tensor {bufferization.writable = true}, - %C : tensor<4xf32> {bufferization.writable = false}, - %lb : index, %ub : index, %step : index) - -> (tensor, tensor) -{ - // CHECK: %[[ALLOC_FOR_A:.*]] = memref.alloc - // CHECK: memref.copy %[[A]], %[[ALLOC_FOR_A]] - - // CHECK: %[[svA:.*]] = memref.subview %[[ALLOC_FOR_A]][0] [4] [1] - // CHECK: %[[svB:.*]] = memref.subview %[[B]][0] [4] [1] - - // CHECK: scf.for {{.*}} - // CHECK-NOT: iter_args - %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B) - -> (tensor, tensor) - { - // %ttA bufferizes to direct copy of %BUFFER_CAST_C into %svA - // CHECK: memref.copy %[[C]], %[[svA]] - %ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor - - // %ttB bufferizes to direct copy of %BUFFER_CAST_C into %BUFFER_CAST_B - // CHECK: memref.copy %[[C]], %[[svB]] - %ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor - - // CHECK-NOT: scf.yield - scf.yield %ttA, %ttB : tensor, tensor - } - - // CHECK: return %[[ALLOC_FOR_A]] : memref - return %r0#0, %r0#1: tensor, tensor -} - // ----- //===----------------------------------------------------------------------===// // Cross function boundary cases. //===----------------------------------------------------------------------===// -// CHECK-LABEL: func @execute_region_with_conflict( -// CHECK-SAME: %[[m1:.*]]: memref {bufferization.writable = "true"}) - -> (f32, tensor, f32) -{ - %f1 = arith.constant 0.0 : f32 - %idx = arith.constant 7 : index - - // scf.execute_region is canonicalized away after bufferization. So just the - // memref.store is left over. - - // CHECK: %[[alloc:.*]] = memref.alloc - // CHECK: memref.copy %[[m1]], %[[alloc]] - // CHECK: memref.store %{{.*}}, %[[alloc]][%{{.*}}] - %0, %1, %2 = scf.execute_region -> (f32, tensor, f32) { - %t2 = tensor.insert %f1 into %t1[%idx] : tensor - scf.yield %f1, %t2, %f1 : f32, tensor, f32 - } - - // CHECK: %[[casted:.*]] = memref.cast %[[alloc]] - // CHECK: %[[load:.*]] = memref.load %[[m1]] - %3 = tensor.extract %t1[%idx] : tensor - - // CHECK: return %{{.*}}, %[[casted]], %[[load]] : f32, memref, f32 - return %0, %1, %3 : f32, tensor, f32 -} - -// ----- - // CHECK: func @matmul( // CHECK-SAME: %[[A:[0-9a-zA-Z]*]]: memref<128x256xf32> // CHECK-SAME: %[[B:[0-9a-zA-Z]*]]: memref<256x192xf32> @@ -536,80 +406,6 @@ return %rA : tensor } -// ----- - -// CHECK-LABEL: func @scf_if_inplace( -// CHECK-SAME: %[[cond:.*]]: i1, %[[t1:.*]]: memref, %[[v:.*]]: vector -func.func @scf_if_inplace(%cond: i1, - %t1: tensor {bufferization.writable = true}, - %v: vector<5xf32>, %idx: index) -> tensor { - - // CHECK: scf.if %[[cond]] { - // CHECK-NEXT: } else { - // CHECK-NEXT: vector.transfer_write %[[v]], %[[t1]] - // CHECK-NEXT: } - // CHECK-NEXT: return - %r = scf.if %cond -> (tensor) { - scf.yield %t1 : tensor - } else { - %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor - scf.yield %t2 : tensor - } - return %r : tensor -} - -// ----- - -// CHECK-LABEL: func @scf_if_inside_scf_for -// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[c10:.*]] = arith.constant 10 : index -// CHECK: scf.for %{{.*}} = %[[c0]] to %[[c10]] step %[[c1]] { -// CHECK: scf.if %{{.*}} { -// CHECK: } else { -// CHECK: vector.transfer_write -// CHECK: } -// CHECK: } -func.func @scf_if_inside_scf_for( - %t1: tensor {bufferization.writable = true}, - %v: vector<5xf32>, %idx: index, - %cond: i1) - -> tensor -{ - %c0 = arith.constant 0 : index - %c1 = arith.constant 1 : index - %c10 = arith.constant 10 : index - %r = scf.for %iv = %c0 to %c10 step %c1 iter_args(%bb = %t1) -> (tensor) { - %r2 = scf.if %cond -> (tensor) { - scf.yield %bb : tensor - } else { - %t2 = vector.transfer_write %v, %bb[%idx] : vector<5xf32>, tensor - scf.yield %t2 : tensor - } - scf.yield %r2 : tensor - } - return %r : tensor -} - -// ----- - -// CHECK-LABEL: func @scf_if_non_equiv_yields( -// CHECK-SAME: %[[cond:.*]]: i1, %[[A:.*]]: memref<{{.*}}>, %[[B:.*]]: memref<{{.*}}>) -> memref<{{.*}}> -func.func @scf_if_non_equiv_yields( - %b : i1, - %A : tensor<4xf32> {bufferization.writable = false}, - %B : tensor<4xf32> {bufferization.writable = false}) - -> tensor<4xf32> -{ - // CHECK: %[[r:.*]] = arith.select %[[cond]], %[[A]], %[[B]] - %r = scf.if %b -> (tensor<4xf32>) { - scf.yield %A : tensor<4xf32> - } else { - scf.yield %B : tensor<4xf32> - } - // CHECK: return %[[r]] - return %r: tensor<4xf32> -} // ----- @@ -823,126 +619,3 @@ } return %5: tensor } - -// ----- - -// Note: This bufferization is inefficient, but it bufferizes correctly. - -// CHECK-LABEL: func @scf_execute_region_yield_non_equivalent( -// CHECK: %[[alloc:.*]] = memref.alloc(%{{.*}}) -// CHECK: %[[clone:.*]] = bufferization.clone %[[alloc]] -// CHECK: memref.dealloc %[[alloc]] -// CHECK: %[[r:.*]] = memref.load %[[clone]][%{{.*}}] -// CHECK: memref.dealloc %[[clone]] -// CHECK: return %[[r]] -func.func @scf_execute_region_yield_non_equivalent(%i: index, %j: index) -> f32 { - %r = scf.execute_region -> (tensor) { - %t2 = linalg.init_tensor [%i] : tensor - scf.yield %t2 : tensor - } - %f = tensor.extract %r[%j] : tensor - return %f : f32 -} - -// ----- - -// Note: This bufferizes to inefficient code, but bufferization should not see -// such IR in the first place. The iter_arg would canonicalize away. This test -// case is just to ensure that the bufferization generates correct code. - -// CHECK-LABEL: func @scf_for_yield_non_equivalent( -// CHECK-SAME: %[[t:.*]]: memref, %lb : index, %ub : index, %step : index) -> tensor { - %r = scf.for %i = %lb to %ub step %step iter_args(%a = %t) -> tensor { - scf.yield %t : tensor - } - - return %r : tensor -} - -// ----- - -// Note: This bufferizes to inefficient code, but bufferization should not see -// such IR in the first place. The iter_arg would canonicalize away. This test -// case is just to ensure that the bufferization generates correct code. - -// CHECK-LABEL: func @scf_for_yield_allocation( -// CHECK-SAME: %[[t:.*]]: memref, %lb : index, %ub : index, - %step : index) -> tensor { - %r = scf.for %i = %lb to %ub step %step iter_args(%a = %t) -> tensor { - %t2 = linalg.init_tensor [%i] : tensor - scf.yield %t2 : tensor - } - - return %r : tensor -} - -// ----- - -// TODO: The scf.yield could bufferize to 1 alloc and 2 copies (instead of -// 2 allocs and 2 copies). - -// CHECK-LABEL: func @scf_for_swapping_yields( -// CHECK-SAME: %[[A:.*]]: memref, %[[B:.*]]: memref -func.func @scf_for_swapping_yields( - %A : tensor, %B : tensor {bufferization.writable = true}, - %C : tensor<4xf32>, %lb : index, %ub : index, %step : index) - -> (f32, f32) -{ -// CHECK-DAG: %[[clone1:.*]] = bufferization.clone %[[A]] -// CHECK-DAG: %[[clone2:.*]] = bufferization.clone %[[B]] -// CHECK: %[[for:.*]]:2 = scf.for {{.*}} iter_args(%[[iter1:.*]] = %[[clone1]], %[[iter2:.*]] = %[[clone2]]) - %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B) - -> (tensor, tensor) - { -// CHECK: %[[sv1:.*]] = memref.subview %[[iter1]] -// CHECK: memref.copy %{{.*}}, %[[sv1]] - %ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor -// CHECK: %[[sv2:.*]] = memref.subview %[[iter2]] -// CHECK: memref.copy %{{.*}}, %[[sv2]] - %ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor - -// CHECK: %[[alloc2:.*]] = memref.alloc(%{{.*}}) -// CHECK: memref.copy %[[iter2]], %[[alloc2]] -// CHECK: memref.dealloc %[[iter2]] -// CHECK: %[[alloc1:.*]] = memref.alloc(%{{.*}}) -// CHECK: memref.copy %[[iter1]], %[[alloc1]] -// CHECK: memref.dealloc %[[iter1]] -// CHECK: %[[casted1:.*]] = memref.cast %[[alloc1]] -// CHECK: %[[casted2:.*]] = memref.cast %[[alloc2]] -// CHECK: scf.yield %[[casted2]], %[[casted1]] - // Yield tensors in different order. - scf.yield %ttB, %ttA : tensor, tensor - } - -// CHECK: %[[r0:.*]] = memref.load %[[for]]#0 -// CHECK: memref.dealloc %[[for]]#0 -// CHECK: %[[r1:.*]] = memref.load %[[for]]#1 -// CHECK: memref.dealloc %[[for]]#1 - %f0 = tensor.extract %r0#0[%step] : tensor - %f1 = tensor.extract %r0#1[%step] : tensor -// CHECK: return %[[r0]], %[[r1]] - return %f0, %f1: f32, f32 -} - diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir @@ -0,0 +1,601 @@ +// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs" -split-input-file | FileCheck %s + +// Run fuzzer with different seeds. +// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs analysis-fuzzer-seed=23" -split-input-file -o /dev/null +// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs analysis-fuzzer-seed=59" -split-input-file -o /dev/null +// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs analysis-fuzzer-seed=91" -split-input-file -o /dev/null + +// CHECK-LABEL: func @scf_for_yield_only +func.func @scf_for_yield_only( + %A : tensor {bufferization.writable = false}, + %B : tensor {bufferization.writable = true}, + %lb : index, + %ub : index, + %step : index) + -> (tensor, tensor) +{ + // CHECK: scf.for + // CHECK-NEXT: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "false"]} + %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor) { + scf.yield %t : tensor + } + + // CHECK: scf.for + // CHECK-NEXT: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]} + %r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %B) -> (tensor) { + scf.yield %t : tensor + } + + // CHECK: return + // CHECK-SAME: __equivalent_func_args__ = [-1, 1] + return %r0, %r1: tensor, tensor +} + +// ----- + +// CHECK-LABEL: func @scf_for_with_tensor.insert_slice +func.func @scf_for_with_tensor.insert_slice( + %A : tensor {bufferization.writable = false}, + %B : tensor {bufferization.writable = true}, + %C : tensor<4xf32> {bufferization.writable = false}, + %lb : index, + %ub : index, + %step : index) + -> (tensor, tensor) +{ + // CHECK: scf.for + // scf.for bbArgs are always inplaceable seen from ops inside the body: + // 1. Either the matching tensor is not inplaceable and an alloc occurs + // which makes bbArg inplaceable. + // 2. Or it is already inplaceable and so is bbArg. + // CHECK-NEXT: tensor.insert_slice + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} + // CHECK-NEXT: tensor.insert_slice + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} + // CHECK-NEXT: scf.yield {__inplace_operands_attr__ = ["true", "true"]} + // CHECK-NEXT: } {__inplace_operands_attr__ = ["none", "none", "none", "false", "true"]} + %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B) + -> (tensor, tensor) + { + %ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor + %ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor + scf.yield %ttA, %ttB : tensor, tensor + } + + // CHECK: return + // CHECK-SAME: __equivalent_func_args__ = [-1, 1] + return %r0#0, %r0#1: tensor, tensor +} + +// ----- + +func.func private @some_use(tensor) -> () + +// CHECK-LABEL: func @scf_for_deps +func.func @scf_for_deps( + %A : tensor {bufferization.writable = true}, + %B : tensor {bufferization.writable = true}, + %lb : index, + %ub : index, + %step : index) + -> (tensor) +{ + // %r0 must be out of place because one use of %t in the subsequent production + // of %r1 is read. + // CHECK: scf.for + // CHECK-NEXT: call + // CHECK-SAME: {__inplace_operands_attr__ = ["false"]} + // CHECK-NEXT: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "false"]} + %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor) { + func.call @some_use(%t) : (tensor) -> () + scf.yield %t : tensor + } + + // %r1 bufferizes inplace fine. + // CHECK: scf.for + // CHECK-NEXT: call + // CHECK-SAME: {__inplace_operands_attr__ = ["false"]} + // CHECK-NEXT: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]} + %r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor) { + func.call @some_use(%t) : (tensor) -> () + scf.yield %t : tensor + } + + // CHECK: return + // CHECK-SAME: __equivalent_func_args__ = [0] + return %r1: tensor +} + +// ----- + +#accesses = [ + affine_map<(i) -> (i)> +] +#trait = { + indexing_maps = #accesses, + iterator_types = ["parallel"] +} + +// CHECK-LABEL: func @reading_scf_for +func.func @reading_scf_for(%t1: tensor {bufferization.writable = true}, + %s: index, %v: vector<5xf32>) -> (tensor, vector<5xf32>) { + + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %cst = arith.constant 0.0 : f32 + + // Write to %t1. + // CHECK: vector.transfer_write + // CHECK-SAME: __inplace_operands_attr__ = ["none", "false", "none"] + %t3 = vector.transfer_write %v, %t1[%s] : vector<5xf32>, tensor + + // Read the old value of %t1 inside the loop via an alias. + // CHECK: scf.for {{.*}} { + %r, %v3 = scf.for %i = %c0 to %s step %c1 iter_args(%t2 = %t1, %v0 = %v) -> (tensor, vector<5xf32>) { + // CHECK: tensor.extract_slice + // CHECK-SAME: __inplace_operands_attr__ = ["true", "none", "none"] + %e = tensor.extract_slice %t2[%s][%s][1] : tensor to tensor + + // Read from %t1 via alias %e. + %v2 = vector.transfer_read %e[%s], %cst : tensor, vector<5xf32> + scf.yield %t2, %v2 : tensor, vector<5xf32> + } + // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true", "none"]} + + // Use %t3 in some way without reading it, so that it does not get DCE'd. + // CHECK: linalg.generic + // CHECK-SAME: __inplace_operands_attr__ = ["true"] + %o = linalg.generic #trait outs (%t3 : tensor) { + ^bb(%0: f32) : + linalg.yield %cst : f32 + } -> (tensor) + + return %o, %v3 : tensor, vector<5xf32> +} + +// ----- + +#accesses = [ + affine_map<(i) -> (i)> +] +#trait = { + indexing_maps = #accesses, + iterator_types = ["parallel"] +} + +// CHECK-LABEL: func @non_reading_scf_for +func.func @non_reading_scf_for(%t1: tensor {bufferization.writable = true}, + %s: index, %v: vector<5xf32>) -> (tensor, vector<5xf32>) { + + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %cst = arith.constant 0.0 : f32 + + // Write to %t1. + // CHECK: vector.transfer_write + // CHECK-SAME: __inplace_operands_attr__ = ["none", "true", "none"] + %t3 = vector.transfer_write %v, %t1[%s] : vector<5xf32>, tensor + + // This loop does not read from %t1. It only writes to it. + // CHECK: scf.for + %r, %v3 = scf.for %i = %c0 to %s step %c1 iter_args(%t2 = %t1, %v0 = %v) -> (tensor, vector<5xf32>) { + // Write to %t1 via %t2. (Overwrite %t3.) + // CHECK: linalg.generic + // CHECK-SAME: __inplace_operands_attr__ = ["true"] + %o2 = linalg.generic #trait outs (%t2 : tensor) { + ^bb(%0: f32) : + linalg.yield %cst : f32 + } -> (tensor) + + // Read overwritten value. This is not a read of %t1. + %v2 = vector.transfer_read %o2[%s], %cst : tensor, vector<5xf32> + scf.yield %o2, %v2 : tensor, vector<5xf32> + } + + // Use %t3 in some way without reading it, so that it does not get DCE'd. + // CHECK: linalg.generic + // CHECK-SAME: __inplace_operands_attr__ = ["true"] + %o = linalg.generic #trait outs (%t3 : tensor) { + ^bb(%0: f32) : + linalg.yield %cst : f32 + } -> (tensor) + + // CHECK: return + // CHECK-SAME: __equivalent_func_args__ = [0, -1] + return %o, %v3 : tensor, vector<5xf32> +} + +// ----- + +//===----------------------------------------------------------------------===// +// scf.if cases +//===----------------------------------------------------------------------===// + +// This example passes analysis, but it fails when bufferizing. +// CHECK-LABEL: func @scf_if_inplace1 +func.func @scf_if_inplace1(%t1: tensor {bufferization.writable = true}, + %t2: tensor {bufferization.writable = true}, + %cond: i1) -> tensor { + %r = scf.if %cond -> (tensor) { + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + scf.yield %t1 : tensor + } else { + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + scf.yield %t2 : tensor + } + return %r : tensor +} + +// ----- + +// CHECK-LABEL: func @scf_if_inplace2 +func.func @scf_if_inplace2(%t1: tensor {bufferization.writable = true}, + %v: vector<5xf32>, %idx: index, + %cond: i1) -> tensor { + %r = scf.if %cond -> (tensor) { + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + scf.yield %t1 : tensor + } else { + // CHECK: vector.transfer_write + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] + %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor + scf.yield %t2 : tensor + } + // CHECK: return + // CHECK-SAME: __equivalent_func_args__ = [0] + return %r : tensor +} + +// ----- + +// CHECK-LABEL: func @scf_if_inplace3 +func.func @scf_if_inplace3(%t1: tensor {bufferization.writable = true}, + %v1: vector<5xf32>, %v2: vector<5xf32>, %idx: index, + %cond: i1) -> tensor { + // CHECK: tensor.extract_slice + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"] + %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor to tensor + %r = scf.if %cond -> (tensor) { + // CHECK: vector.transfer_write + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] + %t2 = vector.transfer_write %v1, %e[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + scf.yield %t2 : tensor + } else { + // Writing the same tensor through an alias. This is OK. + // CHECK: vector.transfer_write + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] + %t3 = vector.transfer_write %v2, %t1[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + scf.yield %t3 : tensor + } + return %r : tensor +} + +// ----- + +// CHECK-LABEL: func @scf_if_in_place4 +func.func @scf_if_in_place4(%t1: tensor {bufferization.writable = true}, + %v: vector<5xf32>, %idx: index, + %cond: i1, %cond2: i1) -> (tensor, vector<10xf32>) { + %cst = arith.constant 0.0 : f32 + %r = scf.if %cond -> (tensor) { + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + scf.yield %t1 : tensor + } else { + // CHECK: vector.transfer_write + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] + %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + scf.yield %t2 : tensor + } + %r_alias = scf.if %cond2 -> (tensor) { + // Reading %r is OK. No conflict. + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + scf.yield %r : tensor + } else { + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + scf.yield %r : tensor + } + %v2 = vector.transfer_read %r_alias[%idx], %cst : tensor, vector<10xf32> + + // CHECK: return + // CHECK-SAME: __equivalent_func_args__ = [0, -1] + return %r_alias, %v2 : tensor, vector<10xf32> +} + +// ----- + +// CHECK-LABEL: func @scf_if_inplace5 +func.func @scf_if_inplace5(%t1: tensor {bufferization.writable = true}, + %idx: index, %cond: i1) -> tensor { + %r = scf.if %cond -> (tensor) { + // CHECK: tensor.extract_slice + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"] + %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor to tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + scf.yield %e : tensor + } else { + // CHECK: tensor.extract_slice + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"] + %f = tensor.extract_slice %t1[%idx][%idx][1] : tensor to tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + scf.yield %f : tensor + } + + // Inserting into an equivalent tensor at the same offset. This bufferizes + // inplace. + // CHECK: tensor.insert_slice + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"] + %r2 = tensor.insert_slice %r into %t1[%idx][%idx][1] : tensor into tensor + + // CHECK: return + // CHECK-SAME: __equivalent_func_args__ = [0] + return %r2 : tensor +} + +// ----- + +// CHECK-LABEL: func @scf_if_inplace6 +func.func @scf_if_inplace6(%t1: tensor {bufferization.writable = true}, + %v1: vector<5xf32>, %v2: vector<5xf32>, + %v3: vector<5xf32>, %idx: index, + %cond: i1, %cond2: i1) -> tensor { + // Test nested scf.if ops. + %r = scf.if %cond -> (tensor) { + %t2 = scf.if %cond2 -> (tensor) { + // CHECK: vector.transfer_write + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] + %t3 = vector.transfer_write %v1, %t1[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + scf.yield %t3 : tensor + } else { + // CHECK: vector.transfer_write + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] + %t4 = vector.transfer_write %v3, %t1[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + scf.yield %t4 : tensor + } + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + scf.yield %t2 : tensor + } else { + // CHECK: vector.transfer_write + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] + %t3 = vector.transfer_write %v2, %t1[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + scf.yield %t3 : tensor + } + + // CHECK: return + // CHECK-SAME: __equivalent_func_args__ = [0] + return %r : tensor +} + +// ----- + +// CHECK-LABEL: func @scf_if_inplace7 +func.func @scf_if_inplace7(%t1: tensor {bufferization.writable = true}, + %v1: vector<5xf32>, %v2: vector<5xf32>, %idx: index, + %idx2: index, %cond: i1) -> (tensor, vector<5xf32>) { + %cst = arith.constant 0.0 : f32 + %r, %v_r2 = scf.if %cond -> (tensor, vector<5xf32>) { + // CHECK: vector.transfer_write + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] + %t2 = vector.transfer_write %v1, %t1[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]} + scf.yield %t2, %v1 : tensor, vector<5xf32> + } else { + // Writing the same tensor through an alias. + // CHECK: vector.transfer_write + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"] + %t3 = vector.transfer_write %v2, %t1[%idx] : vector<5xf32>, tensor + // Read the original value of %t1. This requires the write in this branch + // to be out-of-place. But the write in the other branch can still be + // inplace. + %v_r = vector.transfer_read %t1[%idx2], %cst : tensor, vector<5xf32> + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]} + scf.yield %t3, %v_r : tensor, vector<5xf32> + } + return %r, %v_r2 : tensor, vector<5xf32> +} + +// ----- + +// CHECK-LABEL: func @scf_if_out_of_place1a +func.func @scf_if_out_of_place1a(%t1: tensor {bufferization.writable = true}, + %idx: index, %idx2: index, + %cond: i1) -> tensor { + %r = scf.if %cond -> (tensor) { + // CHECK: tensor.extract_slice + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"] + %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor to tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + scf.yield %e : tensor + } else { + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + scf.yield %t1 : tensor + } + + // Reading from and writing to the same tensor via different args. This is a + // conflict. + // CHECK: tensor.insert_slice + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none", "none"] + %r2 = tensor.insert_slice %r into %t1[%idx2][%idx2][1] : tensor into tensor + return %r2 : tensor +} + +// ----- + +// CHECK-LABEL: func @scf_if_out_of_place1b +func.func @scf_if_out_of_place1b(%t1: tensor {bufferization.writable = true}, + %idx: index, %idx2: index, %idx3: index, + %cond: i1) -> tensor { + %r = scf.if %cond -> (tensor) { + // CHECK: tensor.extract_slice + // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"] + %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor to tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + scf.yield %e : tensor + } else { + // CHECK: tensor.extract_slice + // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"] + %f = tensor.extract_slice %t1[%idx2][%idx2][1] : tensor to tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + scf.yield %f : tensor + } + + // Reading from and writing to the same tensor via different args. This is a + // conflict. In contrast to scf_if_out_of_place1a, the fact that %r aliases + // with %t1 is only detected when analyzing the tensor.extract_slices. That's + // why the tensor.insert_slice is inplace and the two extract_slices are + // out-of-place. + // CHECK: tensor.insert_slice + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"] + %r2 = tensor.insert_slice %r into %t1[%idx3][%idx3][1] : tensor into tensor + + // CHECK: return + // CHECK-SAME: __equivalent_func_args__ = [0] + return %r2 : tensor +} + +// ----- + +// CHECK-LABEL: func @scf_if_out_of_place1c +func.func @scf_if_out_of_place1c(%t1: tensor {bufferization.writable = true}, + %idx: index, %idx2: index, %cond: i1) -> tensor { + %r = scf.if %cond -> (tensor) { + // CHECK: tensor.extract_slice + // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"] + %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor to tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + scf.yield %e : tensor + } else { + // TODO: This one could bufferize inplace, but the analysis is too restrictive. + // CHECK: tensor.extract_slice + // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"] + %f = tensor.extract_slice %t1[%idx2][%idx2][1] : tensor to tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + scf.yield %f : tensor + } + + // CHECK: tensor.insert_slice + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"] + %r2 = tensor.insert_slice %r into %t1[%idx2][%idx2][1] : tensor into tensor + + // CHECK: return + // CHECK-SAME: __equivalent_func_args__ = [0] + return %r2 : tensor +} + +// ----- + +// CHECK-LABEL: func @scf_if_out_of_place2 +func.func @scf_if_out_of_place2(%t1: tensor {bufferization.writable = true}, + %v: vector<5xf32>, %idx: index, + %cond: i1) -> (tensor, vector<10xf32>) { + %cst = arith.constant 0.0 : f32 + %r = scf.if %cond -> (tensor) { + scf.yield %t1 : tensor + } else { + // CHECK: vector.transfer_write + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"] + %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + scf.yield %t2 : tensor + } + + // Read the old value of %t1. Forces the transfer_write to bufferize + // out-of-place. + %v2 = vector.transfer_read %t1[%idx], %cst : tensor, vector<10xf32> + return %r, %v2 : tensor, vector<10xf32> +} + +// ----- + +// CHECK-LABEL: func @scf_if_out_of_place3 +func.func @scf_if_out_of_place3(%t1: tensor {bufferization.writable = true}, + %v: vector<5xf32>, %idx: index, + %cond: i1, %cond2: i1) -> (tensor, vector<10xf32>) { + %cst = arith.constant 0.0 : f32 + %r = scf.if %cond -> (tensor) { + scf.yield %t1 : tensor + } else { + // CHECK: vector.transfer_write + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"] + %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + scf.yield %t2 : tensor + } + %t1_alias = scf.if %cond2 -> (tensor) { + // scf.yield bufferizes to a read. That is a conflict in this example. + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + scf.yield %t1 : tensor + } else { + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + scf.yield %t1 : tensor + } + %v2 = vector.transfer_read %t1_alias[%idx], %cst : tensor, vector<10xf32> + return %r, %v2 : tensor, vector<10xf32> +} + +// ----- + +// CHECK-LABEL: func @write_to_same_tensor_in_loop_in_place( +func.func @write_to_same_tensor_in_loop_in_place( + %A : tensor {linalg.inplaceable = true}, + %lb : index, %ub : index, %step : index, %sz: index) + -> (tensor) +{ + // CHECK: scf.for {{.*}} { + %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor) { + %B = linalg.init_tensor [%sz] : tensor + %i2 = arith.index_cast %i : index to i32 + %i3 = arith.sitofp %i2 : i32 to f32 + // The tensor.insert is in-place because the %B is defined inside the loop. + // CHECK: tensor.insert + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]} + %B2 = tensor.insert %i3 into %B[%i] : tensor + // CHECK: tensor.insert_slice + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} + %A2 = tensor.insert_slice %B2 into %t[%i][%sz][1] : tensor into tensor + scf.yield %A2 : tensor + } + // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]} + + return %r0 : tensor +} diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir @@ -0,0 +1,330 @@ +// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -split-input-file | FileCheck %s + +// Run fuzzer with different seeds. +// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=23 bufferize-function-boundaries" -split-input-file -o /dev/null +// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=59 bufferize-function-boundaries" -split-input-file -o /dev/null +// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=91 bufferize-function-boundaries" -split-input-file -o /dev/null + +// Test bufferization using memref types that have no layout map. +// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs fully-dynamic-layout-maps=0 bufferize-function-boundaries" -split-input-file -o /dev/null + +// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> + +// CHECK-LABEL: func @scf_for_yield_only( +// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref, +// CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref +// CHECK-SAME: ) -> memref { +func.func @scf_for_yield_only( + %A : tensor {bufferization.writable = false}, + %B : tensor {bufferization.writable = true}, + %lb : index, %ub : index, %step : index) + -> (tensor, tensor) +{ + // CHECK: %[[ALLOC_FOR_A:.*]] = memref.alloc + // CHECK: memref.copy %[[A]], %[[ALLOC_FOR_A]] + + // The first scf.for remains but just turns into dead code. + %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor) { + scf.yield %t : tensor + } + + // The second scf.for remains but just turns into dead code. + %r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %B) -> (tensor) { + scf.yield %t : tensor + } + + // CHECK: return %[[ALLOC_FOR_A]] : memref + // CHECK-NOT: dealloc + return %r0, %r1: tensor, tensor +} + +// ----- + +// Ensure that the function bufferizes without error. This tests pre-order +// traversal of scf.for loops during bufferization. No need to check the IR, +// just want to make sure that it does not crash. + +// CHECK-LABEL: func @nested_scf_for +func.func @nested_scf_for(%A : tensor {bufferization.writable = true}, + %v : vector<5xf32>) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c10 = arith.constant 10 : index + %r1 = scf.for %i = %c0 to %c10 step %c1 iter_args(%B = %A) -> tensor { + %r2 = scf.for %j = %c0 to %c10 step %c1 iter_args(%C = %B) -> tensor { + %w = vector.transfer_write %v, %C[%c0] : vector<5xf32>, tensor + scf.yield %w : tensor + } + scf.yield %r2 : tensor + } + return %r1 : tensor +} + +// ----- + +// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> + +// CHECK-LABEL: func @scf_for_with_tensor.insert_slice +// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref +// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref +// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]> +func.func @scf_for_with_tensor.insert_slice( + %A : tensor {bufferization.writable = false}, + %B : tensor {bufferization.writable = true}, + %C : tensor<4xf32> {bufferization.writable = false}, + %lb : index, %ub : index, %step : index) + -> (tensor, tensor) +{ + // CHECK: %[[ALLOC_FOR_A:.*]] = memref.alloc + // CHECK: memref.copy %[[A]], %[[ALLOC_FOR_A]] + + // CHECK: %[[svA:.*]] = memref.subview %[[ALLOC_FOR_A]][0] [4] [1] + // CHECK: %[[svB:.*]] = memref.subview %[[B]][0] [4] [1] + + // CHECK: scf.for {{.*}} + // CHECK-NOT: iter_args + %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B) + -> (tensor, tensor) + { + // %ttA bufferizes to direct copy of %BUFFER_CAST_C into %svA + // CHECK: memref.copy %[[C]], %[[svA]] + %ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor + + // %ttB bufferizes to direct copy of %BUFFER_CAST_C into %BUFFER_CAST_B + // CHECK: memref.copy %[[C]], %[[svB]] + %ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor + + // CHECK-NOT: scf.yield + scf.yield %ttA, %ttB : tensor, tensor + } + + // CHECK: return %[[ALLOC_FOR_A]] : memref + return %r0#0, %r0#1: tensor, tensor +} + +// ----- + +// CHECK-LABEL: func @execute_region_with_conflict( +// CHECK-SAME: %[[m1:.*]]: memref {bufferization.writable = "true"}) + -> (f32, tensor, f32) +{ + %f1 = arith.constant 0.0 : f32 + %idx = arith.constant 7 : index + + // scf.execute_region is canonicalized away after bufferization. So just the + // memref.store is left over. + + // CHECK: %[[alloc:.*]] = memref.alloc + // CHECK: memref.copy %[[m1]], %[[alloc]] + // CHECK: memref.store %{{.*}}, %[[alloc]][%{{.*}}] + %0, %1, %2 = scf.execute_region -> (f32, tensor, f32) { + %t2 = tensor.insert %f1 into %t1[%idx] : tensor + scf.yield %f1, %t2, %f1 : f32, tensor, f32 + } + + // CHECK: %[[casted:.*]] = memref.cast %[[alloc]] + // CHECK: %[[load:.*]] = memref.load %[[m1]] + %3 = tensor.extract %t1[%idx] : tensor + + // CHECK: return %{{.*}}, %[[casted]], %[[load]] : f32, memref, f32 + return %0, %1, %3 : f32, tensor, f32 +} + +// ----- + +// CHECK-LABEL: func @scf_if_inplace( +// CHECK-SAME: %[[cond:.*]]: i1, %[[t1:.*]]: memref, %[[v:.*]]: vector +func.func @scf_if_inplace(%cond: i1, + %t1: tensor {bufferization.writable = true}, + %v: vector<5xf32>, %idx: index) -> tensor { + + // CHECK: scf.if %[[cond]] { + // CHECK-NEXT: } else { + // CHECK-NEXT: vector.transfer_write %[[v]], %[[t1]] + // CHECK-NEXT: } + // CHECK-NEXT: return + %r = scf.if %cond -> (tensor) { + scf.yield %t1 : tensor + } else { + %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor + scf.yield %t2 : tensor + } + return %r : tensor +} + +// ----- + +// CHECK-LABEL: func @scf_if_inside_scf_for +// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[c10:.*]] = arith.constant 10 : index +// CHECK: scf.for %{{.*}} = %[[c0]] to %[[c10]] step %[[c1]] { +// CHECK: scf.if %{{.*}} { +// CHECK: } else { +// CHECK: vector.transfer_write +// CHECK: } +// CHECK: } +func.func @scf_if_inside_scf_for( + %t1: tensor {bufferization.writable = true}, + %v: vector<5xf32>, %idx: index, + %cond: i1) + -> tensor +{ + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c10 = arith.constant 10 : index + %r = scf.for %iv = %c0 to %c10 step %c1 iter_args(%bb = %t1) -> (tensor) { + %r2 = scf.if %cond -> (tensor) { + scf.yield %bb : tensor + } else { + %t2 = vector.transfer_write %v, %bb[%idx] : vector<5xf32>, tensor + scf.yield %t2 : tensor + } + scf.yield %r2 : tensor + } + return %r : tensor +} + +// ----- + +// CHECK-LABEL: func @scf_if_non_equiv_yields( +// CHECK-SAME: %[[cond:.*]]: i1, %[[A:.*]]: memref<{{.*}}>, %[[B:.*]]: memref<{{.*}}>) -> memref<{{.*}}> +func.func @scf_if_non_equiv_yields( + %b : i1, + %A : tensor<4xf32> {bufferization.writable = false}, + %B : tensor<4xf32> {bufferization.writable = false}) + -> tensor<4xf32> +{ + // CHECK: %[[r:.*]] = arith.select %[[cond]], %[[A]], %[[B]] + %r = scf.if %b -> (tensor<4xf32>) { + scf.yield %A : tensor<4xf32> + } else { + scf.yield %B : tensor<4xf32> + } + // CHECK: return %[[r]] + return %r: tensor<4xf32> +} + +// ----- + +// Note: This bufferization is inefficient, but it bufferizes correctly. + +// CHECK-LABEL: func @scf_execute_region_yield_non_equivalent( +// CHECK: %[[alloc:.*]] = memref.alloc(%{{.*}}) +// CHECK: %[[clone:.*]] = bufferization.clone %[[alloc]] +// CHECK: memref.dealloc %[[alloc]] +// CHECK: %[[r:.*]] = memref.load %[[clone]][%{{.*}}] +// CHECK: memref.dealloc %[[clone]] +// CHECK: return %[[r]] +func.func @scf_execute_region_yield_non_equivalent(%i: index, %j: index) -> f32 { + %r = scf.execute_region -> (tensor) { + %t2 = linalg.init_tensor [%i] : tensor + scf.yield %t2 : tensor + } + %f = tensor.extract %r[%j] : tensor + return %f : f32 +} + +// ----- + +// Note: This bufferizes to inefficient code, but bufferization should not see +// such IR in the first place. The iter_arg would canonicalize away. This test +// case is just to ensure that the bufferization generates correct code. + +// CHECK-LABEL: func @scf_for_yield_non_equivalent( +// CHECK-SAME: %[[t:.*]]: memref, %lb : index, %ub : index, %step : index) -> tensor { + %r = scf.for %i = %lb to %ub step %step iter_args(%a = %t) -> tensor { + scf.yield %t : tensor + } + + return %r : tensor +} + +// ----- + +// Note: This bufferizes to inefficient code, but bufferization should not see +// such IR in the first place. The iter_arg would canonicalize away. This test +// case is just to ensure that the bufferization generates correct code. + +// CHECK-LABEL: func @scf_for_yield_allocation( +// CHECK-SAME: %[[t:.*]]: memref, %lb : index, %ub : index, + %step : index) -> tensor { + %r = scf.for %i = %lb to %ub step %step iter_args(%a = %t) -> tensor { + %t2 = linalg.init_tensor [%i] : tensor + scf.yield %t2 : tensor + } + + return %r : tensor +} + +// ----- + +// TODO: The scf.yield could bufferize to 1 alloc and 2 copies (instead of +// 2 allocs and 2 copies). + +// CHECK-LABEL: func @scf_for_swapping_yields( +// CHECK-SAME: %[[A:.*]]: memref, %[[B:.*]]: memref +func.func @scf_for_swapping_yields( + %A : tensor, %B : tensor {bufferization.writable = true}, + %C : tensor<4xf32>, %lb : index, %ub : index, %step : index) + -> (f32, f32) +{ +// CHECK-DAG: %[[clone1:.*]] = bufferization.clone %[[A]] +// CHECK-DAG: %[[clone2:.*]] = bufferization.clone %[[B]] +// CHECK: %[[for:.*]]:2 = scf.for {{.*}} iter_args(%[[iter1:.*]] = %[[clone1]], %[[iter2:.*]] = %[[clone2]]) + %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B) + -> (tensor, tensor) + { +// CHECK: %[[sv1:.*]] = memref.subview %[[iter1]] +// CHECK: memref.copy %{{.*}}, %[[sv1]] + %ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor +// CHECK: %[[sv2:.*]] = memref.subview %[[iter2]] +// CHECK: memref.copy %{{.*}}, %[[sv2]] + %ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor + +// CHECK: %[[alloc2:.*]] = memref.alloc(%{{.*}}) +// CHECK: memref.copy %[[iter2]], %[[alloc2]] +// CHECK: memref.dealloc %[[iter2]] +// CHECK: %[[alloc1:.*]] = memref.alloc(%{{.*}}) +// CHECK: memref.copy %[[iter1]], %[[alloc1]] +// CHECK: memref.dealloc %[[iter1]] +// CHECK: %[[casted1:.*]] = memref.cast %[[alloc1]] +// CHECK: %[[casted2:.*]] = memref.cast %[[alloc2]] +// CHECK: scf.yield %[[casted2]], %[[casted1]] + // Yield tensors in different order. + scf.yield %ttB, %ttA : tensor, tensor + } + +// CHECK: %[[r0:.*]] = memref.load %[[for]]#0 +// CHECK: memref.dealloc %[[for]]#0 +// CHECK: %[[r1:.*]] = memref.load %[[for]]#1 +// CHECK: memref.dealloc %[[for]]#1 + %f0 = tensor.extract %r0#0[%step] : tensor + %f1 = tensor.extract %r0#1[%step] : tensor +// CHECK: return %[[r0]], %[[r1]] + return %f0, %f1: f32, f32 +}