diff --git a/mlir/test/Transforms/buffer-deallocation.mlir b/mlir/test/Transforms/buffer-deallocation.mlir --- a/mlir/test/Transforms/buffer-deallocation.mlir +++ b/mlir/test/Transforms/buffer-deallocation.mlir @@ -15,8 +15,6 @@ // Since bb1 does not contain an adequate alloc and the alloc in bb2 is not // moved to bb0, we need to insert allocs and copies. -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @condBranch func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { cond_br %arg0, ^bb1, ^bb2 @@ -24,18 +22,14 @@ br ^bb3(%arg1 : memref<2xf32>) ^bb2: %0 = alloc() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } br ^bb3(%0 : memref<2xf32>) ^bb3(%1: memref<2xf32>): - "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) return } @@ -44,12 +38,12 @@ // CHECK-NEXT: linalg.copy // CHECK-NEXT: br ^bb3(%[[ALLOC0]] // CHECK: %[[ALLOC1:.*]] = alloc() -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // CHECK: %[[ALLOC2:.*]] = alloc() // CHECK-NEXT: linalg.copy // CHECK-NEXT: dealloc %[[ALLOC1]] // CHECK-NEXT: br ^bb3(%[[ALLOC2]] -// CHECK: linalg.copy +// CHECK: test.copy // CHECK-NEXT: dealloc // CHECK-NEXT: return @@ -68,8 +62,6 @@ // appropriate shape dimensions. The copy buffer deallocation will be applied // to %2 in block bb3. -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @condBranchDynamicType func @condBranchDynamicType( %arg0: i1, @@ -81,18 +73,14 @@ br ^bb3(%arg1 : memref) ^bb2(%0: index): %1 = alloc(%0) : memref - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg1: memref) - outs(%1: memref) { + test.buffer_based in(%arg1: memref) out(%1: memref) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } br ^bb3(%1 : memref) ^bb3(%2: memref): - "linalg.copy"(%2, %arg2) : (memref, memref) -> () + test.copy(%2, %arg2) : (memref, memref) return } @@ -103,14 +91,14 @@ // CHECK-NEXT: br ^bb3(%[[ALLOC0]] // CHECK: ^bb2(%[[IDX:.*]]:{{.*}}) // CHECK-NEXT: %[[ALLOC1:.*]] = alloc(%[[IDX]]) -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // CHECK: %[[DIM1:.*]] = dim %[[ALLOC1]] // CHECK-NEXT: %[[ALLOC2:.*]] = alloc(%[[DIM1]]) // CHECK-NEXT: linalg.copy(%[[ALLOC1]], %[[ALLOC2]]) // CHECK-NEXT: dealloc %[[ALLOC1]] // CHECK-NEXT: br ^bb3 // CHECK-NEXT: ^bb3(%[[ALLOC3:.*]]:{{.*}}) -// CHECK: linalg.copy(%[[ALLOC3]], +// CHECK: test.copy(%[[ALLOC3]], // CHECK-NEXT: dealloc %[[ALLOC3]] // CHECK-NEXT: return @@ -136,8 +124,6 @@ // buffer deallocations will be applied to %2 in block bb5 and to %3 in block // bb6. Furthermore, there should be no copy inserted for %4. -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @condBranchDynamicTypeNested func @condBranchDynamicTypeNested( %arg0: i1, @@ -149,14 +135,10 @@ br ^bb6(%arg1 : memref) ^bb2(%0: index): %1 = alloc(%0) : memref - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg1: memref) - outs(%1: memref) { + test.buffer_based in(%arg1: memref) out(%1: memref) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } cond_br %arg0, ^bb3, ^bb4 ^bb3: @@ -168,7 +150,7 @@ ^bb6(%3: memref): br ^bb7(%3 : memref) ^bb7(%4: memref): - "linalg.copy"(%4, %arg2) : (memref, memref) -> () + test.copy(%4, %arg2) : (memref, memref) return } @@ -180,7 +162,7 @@ // CHECK-NEXT: br ^bb6 // CHECK: ^bb2(%[[IDX:.*]]:{{.*}}) // CHECK-NEXT: %[[ALLOC1:.*]] = alloc(%[[IDX]]) -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // CHECK: cond_br // CHECK: ^bb3: // CHECK-NEXT: br ^bb5(%[[ALLOC1]]{{.*}}) @@ -195,7 +177,7 @@ // CHECK-NEXT: ^bb6(%[[ALLOC4:.*]]:{{.*}}) // CHECK-NEXT: br ^bb7(%[[ALLOC4]]{{.*}}) // CHECK-NEXT: ^bb7(%[[ALLOC5:.*]]:{{.*}}) -// CHECK: linalg.copy(%[[ALLOC5]], +// CHECK: test.copy(%[[ALLOC5]], // CHECK-NEXT: dealloc %[[ALLOC4]] // CHECK-NEXT: return @@ -226,25 +208,19 @@ // exit block after CopyOp since %1 is an alias for %0 and %arg1. Furthermore, // we have to insert a copy and an alloc in the beginning of the function. -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @criticalEdge func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>) ^bb1: %0 = alloc() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } br ^bb2(%0 : memref<2xf32>) ^bb2(%1: memref<2xf32>): - "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) return } @@ -252,11 +228,11 @@ // CHECK-NEXT: linalg.copy // CHECK-NEXT: cond_br // CHECK: %[[ALLOC1:.*]] = alloc() -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // CHECK: %[[ALLOC2:.*]] = alloc() // CHECK-NEXT: linalg.copy // CHECK-NEXT: dealloc %[[ALLOC1]] -// CHECK: linalg.copy +// CHECK: test.copy // CHECK-NEXT: dealloc // CHECK-NEXT: return @@ -271,25 +247,19 @@ // BufferDeallocation expected behavior: It only inserts a DeallocOp at the // exit block after CopyOp since %1 is an alias for %0 and %arg1. -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @invCriticalEdge func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { %0 = alloc() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>) ^bb1: br ^bb2(%0 : memref<2xf32>) ^bb2(%1: memref<2xf32>): - "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) return } @@ -306,22 +276,16 @@ // bb3 <- Initial position of the second AllocOp // BufferDeallocation expected behavior: It only inserts two missing // DeallocOps in the exit block. %5 is an alias for %0. Therefore, the -// DeallocOp for %0 should occur after the last GenericOp. The Dealloc for %7 -// should happen after the CopyOp. - -#map0 = affine_map<(d0) -> (d0)> +// DeallocOp for %0 should occur after the last BufferBasedOp. The Dealloc for +// %7 should happen after the CopyOp. // CHECK-LABEL: func @ifElse func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { %0 = alloc() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), @@ -332,25 +296,21 @@ br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>) ^bb3(%5: memref<2xf32>, %6: memref<2xf32>): %7 = alloc() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%5: memref<2xf32>) - outs(%7: memref<2xf32>) { + test.buffer_based in(%5: memref<2xf32>) out(%7: memref<2xf32>) { ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): %tmp2 = exp %gen2_arg0 : f32 - linalg.yield %tmp2 : f32 + test.buffer_tensor_yield %tmp2 : f32 } - "linalg.copy"(%7, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%7, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc() -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // CHECK: %[[SECOND_ALLOC:.*]] = alloc() -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // CHECK: dealloc %[[FIRST_ALLOC]] -// CHECK: linalg.copy +// CHECK: test.copy // CHECK-NEXT: dealloc %[[SECOND_ALLOC]] // CHECK-NEXT: return @@ -365,19 +325,13 @@ // BufferDeallocation expected behavior: It only inserts a missing DeallocOp // in the exit block since %5 or %6 are the latest aliases of %0. -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @ifElseNoUsers func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { %0 = alloc() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), @@ -387,12 +341,12 @@ ^bb2(%3: memref<2xf32>, %4: memref<2xf32>): br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>) ^bb3(%5: memref<2xf32>, %6: memref<2xf32>): - "linalg.copy"(%arg1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%arg1, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc() -// CHECK: linalg.copy +// CHECK: test.copy // CHECK-NEXT: dealloc %[[FIRST_ALLOC]] // CHECK-NEXT: return @@ -410,19 +364,13 @@ // BufferDeallocation expected behavior: Two missing DeallocOps should be // inserted in the exit block. -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @ifElseNested func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { %0 = alloc() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), @@ -437,25 +385,21 @@ br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>) ^bb5(%7: memref<2xf32>, %8: memref<2xf32>): %9 = alloc() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%7: memref<2xf32>) - outs(%9: memref<2xf32>) { + test.buffer_based in(%7: memref<2xf32>) out(%9: memref<2xf32>) { ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): %tmp2 = exp %gen2_arg0 : f32 - linalg.yield %tmp2 : f32 + test.buffer_tensor_yield %tmp2 : f32 } - "linalg.copy"(%9, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%9, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc() -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // CHECK: %[[SECOND_ALLOC:.*]] = alloc() -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // CHECK: dealloc %[[FIRST_ALLOC]] -// CHECK: linalg.copy +// CHECK: test.copy // CHECK-NEXT: dealloc %[[SECOND_ALLOC]] // CHECK-NEXT: return @@ -463,41 +407,31 @@ // Test Case: Dead operations in a single block. // BufferDeallocation expected behavior: It only inserts the two missing -// DeallocOps after the last GenericOp. - -#map0 = affine_map<(d0) -> (d0)> +// DeallocOps after the last BufferBasedOp. // CHECK-LABEL: func @redundantOperations func @redundantOperations(%arg0: memref<2xf32>) { %0 = alloc() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg0: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } %1 = alloc() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%0: memref<2xf32>) - outs(%1: memref<2xf32>) { + test.buffer_based in(%0: memref<2xf32>) out(%1: memref<2xf32>) { ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): %tmp2 = exp %gen2_arg0 : f32 - linalg.yield %tmp2 : f32 + test.buffer_tensor_yield %tmp2 : f32 } return } // CHECK: (%[[ARG0:.*]]: {{.*}}) // CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc() -// CHECK-NEXT: linalg.generic {{.*}} ins(%[[ARG0]]{{.*}}outs(%[[FIRST_ALLOC]] +// CHECK-NEXT: test.buffer_based in(%[[ARG0]]{{.*}}out(%[[FIRST_ALLOC]] // CHECK: %[[SECOND_ALLOC:.*]] = alloc() -// CHECK-NEXT: linalg.generic {{.*}} ins -// CHECK-SAME: (%[[FIRST_ALLOC]]{{.*}}outs(%[[SECOND_ALLOC]] +// CHECK-NEXT: test.buffer_based in +// CHECK-SAME: (%[[FIRST_ALLOC]]{{.*}}out(%[[SECOND_ALLOC]] // CHECK: dealloc // CHECK-NEXT: dealloc // CHECK-NEXT: return @@ -515,8 +449,6 @@ // inserted in the respective block of the allocs. The copy is freed in the exit // block. -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @moving_alloc_and_inserting_missing_dealloc func @moving_alloc_and_inserting_missing_dealloc( %cond: i1, @@ -525,30 +457,22 @@ cond_br %cond, ^bb1, ^bb2 ^bb1: %0 = alloc() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg0: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } br ^exit(%0 : memref<2xf32>) ^bb2: %1 = alloc() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg0: memref<2xf32>) - outs(%1: memref<2xf32>) { + test.buffer_based in(%arg0: memref<2xf32>) out(%1: memref<2xf32>) { ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): %tmp2 = exp %gen2_arg0 : f32 - linalg.yield %tmp2 : f32 + test.buffer_tensor_yield %tmp2 : f32 } br ^exit(%1 : memref<2xf32>) ^exit(%arg2: memref<2xf32>): - "linalg.copy"(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>) return } @@ -556,20 +480,20 @@ // CHECK: ^bb1 // CHECK: ^bb1 // CHECK: %[[ALLOC0:.*]] = alloc() -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // CHECK: %[[ALLOC1:.*]] = alloc() // CHECK-NEXT: linalg.copy // CHECK-NEXT: dealloc %[[ALLOC0]] // CHECK-NEXT: br ^bb3(%[[ALLOC1]] // CHECK-NEXT: ^bb2 // CHECK-NEXT: %[[ALLOC2:.*]] = alloc() -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // CHECK: %[[ALLOC3:.*]] = alloc() // CHECK-NEXT: linalg.copy // CHECK-NEXT: dealloc %[[ALLOC2]] // CHECK-NEXT: br ^bb3(%[[ALLOC3]] // CHECK-NEXT: ^bb3(%[[ALLOC4:.*]]:{{.*}}) -// CHECK: linalg.copy +// CHECK: test.copy // CHECK-NEXT: dealloc %[[ALLOC4]] // CHECK-NEXT: return @@ -585,8 +509,6 @@ // BufferDeallocation expected behavior: The existing DeallocOp should be // moved to exit block. -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @moving_invalid_dealloc_op_complex func @moving_invalid_dealloc_op_complex( %cond: i1, @@ -597,25 +519,21 @@ ^bb1: br ^exit(%arg0 : memref<2xf32>) ^bb2: - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg0: memref<2xf32>) - outs(%1: memref<2xf32>) { + test.buffer_based in(%arg0: memref<2xf32>) out(%1: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } dealloc %1 : memref<2xf32> br ^exit(%1 : memref<2xf32>) ^exit(%arg2: memref<2xf32>): - "linalg.copy"(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: %[[ALLOC0:.*]] = alloc() // CHECK-NEXT: cond_br -// CHECK: linalg.copy +// CHECK: test.copy // CHECK-NEXT: dealloc %[[ALLOC0]] // CHECK-NEXT: return @@ -623,28 +541,22 @@ // Test Case: Inserting missing DeallocOp in a single block. -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @inserting_missing_dealloc_simple func @inserting_missing_dealloc_simple( %arg0 : memref<2xf32>, %arg1: memref<2xf32>) { %0 = alloc() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg0: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } - "linalg.copy"(%0, %arg1) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%0, %arg1) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: %[[ALLOC0:.*]] = alloc() -// CHECK: linalg.copy +// CHECK: test.copy // CHECK-NEXT: dealloc %[[ALLOC0]] // ----- @@ -652,39 +564,31 @@ // Test Case: Moving invalid DeallocOp (there is a user after deallocation) in a // single block. -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @moving_invalid_dealloc_op func @moving_invalid_dealloc_op(%arg0 : memref<2xf32>, %arg1: memref<2xf32>) { %0 = alloc() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg0: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } dealloc %0 : memref<2xf32> - "linalg.copy"(%0, %arg1) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%0, %arg1) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: %[[ALLOC0:.*]] = alloc() -// CHECK: linalg.copy +// CHECK: test.copy // CHECK-NEXT: dealloc %[[ALLOC0]] // ----- -// Test Case: Nested regions - This test defines a GenericOp inside the region -// of another GenericOp. -// BufferDeallocation expected behavior: The AllocOp of inner GenericOp should -// remain inside the region of outer GenericOp and it should insert the missing -// DeallocOp in the same region. The missing DeallocOp should be inserted after -// Linalg.Copy. - -#map0 = affine_map<(d0) -> (d0)> +// Test Case: Nested regions - This test defines a BufferBasedOp inside the +// region of another BufferBasedOp. +// BufferDeallocation expected behavior: The AllocOp of inner BufferBasedOp +// should remain inside the region of outer BufferBasedOp and it should insert +// the missing DeallocOp in the same region. The missing DeallocOp should be +// inserted after test.copy. // CHECK-LABEL: func @nested_regions_and_cond_branch func @nested_regions_and_cond_branch( @@ -696,28 +600,20 @@ br ^bb3(%arg1 : memref<2xf32>) ^bb2: %0 = alloc() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %1 = alloc() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%1: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%1: memref<2xf32>) { ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): %tmp2 = exp %gen2_arg0 : f32 - linalg.yield %tmp2 : f32 + test.buffer_tensor_yield %tmp2 : f32 } %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } br ^bb3(%0 : memref<2xf32>) ^bb3(%1: memref<2xf32>): - "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK: (%[[cond:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %{{.*}}: {{.*}}) @@ -726,16 +622,16 @@ // CHECK-NEXT: linalg.copy(%[[ARG1]], %[[ALLOC0]]) // CHECK: ^[[BB2]]: // CHECK: %[[ALLOC1:.*]] = alloc() -// CHECK-NEXT: linalg.generic {{{.*}}} ins(%[[ARG1]]{{.*}}outs(%[[ALLOC1]] +// CHECK-NEXT: test.buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOC1]] // CHECK: %[[ALLOC2:.*]] = alloc() -// CHECK-NEXT: linalg.generic {{{.*}}} ins(%[[ARG1]]{{.*}}outs(%[[ALLOC2]] +// CHECK-NEXT: test.buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOC2]] // CHECK: dealloc %[[ALLOC2]] // CHECK-NEXT: %{{.*}} = exp // CHECK: %[[ALLOC3:.*]] = alloc() // CHECK-NEXT: linalg.copy(%[[ALLOC1]], %[[ALLOC3]]) // CHECK-NEXT: dealloc %[[ALLOC1]] // CHECK: ^[[BB3:.*]]({{.*}}): -// CHECK: linalg.copy +// CHECK: test.copy // CHECK-NEXT: dealloc // ----- @@ -743,9 +639,7 @@ // Test Case: buffer deallocation escaping // BufferDeallocation expected behavior: It must not dealloc %arg1 and %x // since they are operands of return operation and should escape from -// deallocating. It should dealloc %y after linalg.copy. - -#map0 = affine_map<(d0) -> (d0)> +// deallocating. It should dealloc %y after test.copy. // CHECK-LABEL: func @memref_in_function_results func @memref_in_function_results( @@ -754,21 +648,19 @@ %arg2: memref<5xf32>) -> (memref<10xf32>, memref<15xf32>) { %x = alloc() : memref<15xf32> %y = alloc() : memref<5xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg0: memref<5xf32>) - outs(%y: memref<5xf32>) { + test.buffer_based in(%arg0: memref<5xf32>) out(%y: memref<5xf32>) { ^bb0(%arg3: f32, %arg4: f32): %2 = exp %arg3 : f32 - linalg.yield %2 : f32 + test.buffer_tensor_yield %2 : f32 } - linalg.copy(%y, %arg2) : memref<5xf32>, memref<5xf32> + test.copy(%y, %arg2) : (memref<5xf32>, memref<5xf32>) return %arg1, %x : memref<10xf32>, memref<15xf32> } // CHECK: (%[[ARG0:.*]]: memref<5xf32>, %[[ARG1:.*]]: memref<10xf32>, // CHECK-SAME: %[[RESULT:.*]]: memref<5xf32>) // CHECK: %[[X:.*]] = alloc() // CHECK: %[[Y:.*]] = alloc() -// CHECK: linalg.copy +// CHECK: test.copy // CHECK: dealloc %[[Y]] // CHECK: return %[[ARG1]], %[[X]] @@ -877,21 +769,19 @@ %1 = subview %0[%arg0, %arg1][%arg0, %arg1][%arg0, %arg1] : memref<64x4xf32, offset: 0, strides: [4, 1]> to memref - "linalg.copy"(%1, %arg2) : - (memref, memref) -> () + test.copy(%1, %arg2) : + (memref, memref) return } // CHECK-NEXT: %[[ALLOC:.*]] = alloc() // CHECK-NEXT: subview -// CHECK-NEXT: linalg.copy +// CHECK-NEXT: test.copy // CHECK-NEXT: dealloc %[[ALLOC]] // CHECK-NEXT: return // ----- -#map0 = affine_map<(d0) -> (d0)> - // Test Case: In the presence of AllocaOps only the AllocOps has top be freed. // Therefore, all allocas are not handled. @@ -902,18 +792,14 @@ br ^bb3(%arg1 : memref<2xf32>) ^bb2: %0 = alloca() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } br ^bb3(%0 : memref<2xf32>) ^bb3(%1: memref<2xf32>): - "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) return } @@ -921,13 +807,11 @@ // CHECK: %[[ALLOCA:.*]] = alloca() // CHECK: br ^bb3(%[[ALLOCA:.*]]) // CHECK-NEXT: ^bb3 -// CHECK-NEXT: linalg.copy +// CHECK-NEXT: test.copy // CHECK-NEXT: return // ----- -#map0 = affine_map<(d0) -> (d0)> - // Test Case: In the presence of AllocaOps only the AllocOps has top be freed. // Therefore, all allocas are not handled. In this case, only alloc %0 has a // dealloc. @@ -935,14 +819,10 @@ // CHECK-LABEL: func @ifElseAlloca func @ifElseAlloca(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { %0 = alloc() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), @@ -953,45 +833,35 @@ br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>) ^bb3(%5: memref<2xf32>, %6: memref<2xf32>): %7 = alloca() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%5: memref<2xf32>) - outs(%7: memref<2xf32>) { + test.buffer_based in(%5: memref<2xf32>) out(%7: memref<2xf32>) { ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): %tmp2 = exp %gen2_arg0 : f32 - linalg.yield %tmp2 : f32 + test.buffer_tensor_yield %tmp2 : f32 } - "linalg.copy"(%7, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%7, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: %[[ALLOC:.*]] = alloc() -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // CHECK: %[[ALLOCA:.*]] = alloca() -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // CHECK: dealloc %[[ALLOC]] -// CHECK: linalg.copy +// CHECK: test.copy // CHECK-NEXT: return // ----- -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @ifElseNestedAlloca func @ifElseNestedAlloca( %arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { %0 = alloca() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), @@ -1006,31 +876,25 @@ br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>) ^bb5(%7: memref<2xf32>, %8: memref<2xf32>): %9 = alloc() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%7: memref<2xf32>) - outs(%9: memref<2xf32>) { + test.buffer_based in(%7: memref<2xf32>) out(%9: memref<2xf32>) { ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): %tmp2 = exp %gen2_arg0 : f32 - linalg.yield %tmp2 : f32 + test.buffer_tensor_yield %tmp2 : f32 } - "linalg.copy"(%9, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%9, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: %[[ALLOCA:.*]] = alloca() -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // CHECK: %[[ALLOC:.*]] = alloc() -// CHECK-NEXT: linalg.generic -// CHECK: linalg.copy +// CHECK-NEXT: test.buffer_based +// CHECK: test.copy // CHECK-NEXT: dealloc %[[ALLOC]] // CHECK-NEXT: return // ----- -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @nestedRegionsAndCondBranchAlloca func @nestedRegionsAndCondBranchAlloca( %arg0: i1, @@ -1041,28 +905,20 @@ br ^bb3(%arg1 : memref<2xf32>) ^bb2: %0 = alloc() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %1 = alloca() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%1: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%1: memref<2xf32>) { ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): %tmp2 = exp %gen2_arg0 : f32 - linalg.yield %tmp2 : f32 + test.buffer_tensor_yield %tmp2 : f32 } %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } br ^bb3(%0 : memref<2xf32>) ^bb3(%1: memref<2xf32>): - "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK: (%[[cond:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %{{.*}}: {{.*}}) @@ -1072,15 +928,15 @@ // CHECK-NEXT: linalg.copy // CHECK: ^[[BB2]]: // CHECK: %[[ALLOC1:.*]] = alloc() -// CHECK-NEXT: linalg.generic {{{.*}}} ins(%[[ARG1]]{{.*}}outs(%[[ALLOC1]] +// CHECK-NEXT: test.buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOC1]] // CHECK: %[[ALLOCA:.*]] = alloca() -// CHECK-NEXT: linalg.generic {{{.*}}} ins(%[[ARG1]]{{.*}}outs(%[[ALLOCA]] +// CHECK-NEXT: test.buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOCA]] // CHECK: %{{.*}} = exp // CHECK: %[[ALLOC2:.*]] = alloc() // CHECK-NEXT: linalg.copy // CHECK-NEXT: dealloc %[[ALLOC1]] // CHECK: ^[[BB3:.*]]({{.*}}): -// CHECK: linalg.copy +// CHECK: test.copy // CHECK-NEXT: dealloc // ----- @@ -1127,7 +983,7 @@ %3 = alloc() : memref<2xf32> scf.yield %3 : memref<2xf32> } - "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>) return } @@ -1145,7 +1001,7 @@ // CHECK: dealloc %[[ALLOC3]] // CHECK: scf.yield %[[ALLOC4]] // CHECK: } -// CHECK: linalg.copy(%[[ALLOC2]], %arg4) +// CHECK: test.copy(%[[ALLOC2]], %arg4) // CHECK-NEXT: dealloc %[[ALLOC2]] // ----- @@ -1174,7 +1030,7 @@ } scf.yield %3 : memref<2xf32> } - "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>) return } @@ -1184,7 +1040,7 @@ // CHECK: scf.yield %[[ALLOC0]] // CHECK: scf.yield %[[IALLOC]] // CHECK: scf.yield %[[ALLOC2]] -// CHECK: linalg.copy(%[[ALLOC1]], %arg4) +// CHECK: test.copy(%[[ALLOC1]], %arg4) // CHECK: dealloc %[[ALLOC0]] // ----- @@ -1279,7 +1135,7 @@ } scf.yield %2 : memref<2xf32> } - "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>) return } @@ -1287,16 +1143,19 @@ // CHECK-NEXT: dealloc %[[ALLOC0]] // CHECK-NEXT: %[[ALLOC1:.*]] = alloc() // CHECK-NEXT: linalg.copy(%arg3, %[[ALLOC1]]) -// CHECK-NEXT: %[[VAL_7:.*]] = scf.for {{.*}} iter_args(%[[IALLOC0:.*]] = %[[ALLOC1]]) +// CHECK-NEXT: %[[VAL_7:.*]] = scf.for {{.*}} iter_args +// CHECK-SAME: (%[[IALLOC0:.*]] = %[[ALLOC1]]) // CHECK: %[[ALLOC2:.*]] = alloc() // CHECK-NEXT: linalg.copy(%[[IALLOC0]], %[[ALLOC2]]) // CHECK-NEXT: dealloc %[[IALLOC0]] -// CHECK-NEXT: %[[ALLOC3:.*]] = scf.for {{.*}} iter_args(%[[IALLOC1:.*]] = %[[ALLOC2]]) +// CHECK-NEXT: %[[ALLOC3:.*]] = scf.for {{.*}} iter_args +// CHECK-SAME: (%[[IALLOC1:.*]] = %[[ALLOC2]]) // CHECK: %[[ALLOC5:.*]] = alloc() // CHECK-NEXT: linalg.copy(%[[IALLOC1]], %[[ALLOC5]]) // CHECK-NEXT: dealloc %[[IALLOC1]] -// CHECK: %[[ALLOC6:.*]] = scf.for {{.*}} iter_args(%[[IALLOC2:.*]] = %[[ALLOC5]]) +// CHECK: %[[ALLOC6:.*]] = scf.for {{.*}} iter_args +// CHECK-SAME: (%[[IALLOC2:.*]] = %[[ALLOC5]]) // CHECK: %[[ALLOC8:.*]] = alloc() // CHECK-NEXT: dealloc %[[ALLOC8]] // CHECK: %[[ALLOC9:.*]] = scf.if @@ -1327,7 +1186,7 @@ // CHECK-NEXT: dealloc %[[ALLOC3]] // CHECK-NEXT: scf.yield %[[ALLOC4]] -// CHECK: linalg.copy(%[[VAL_7]], %arg4) +// CHECK: test.copy(%[[VAL_7]], %arg4) // CHECK-NEXT: dealloc %[[VAL_7]] // ----- @@ -1359,7 +1218,7 @@ br ^loopHeader(%inc, %alloc1 : i32, memref) ^exit(%buff3 : memref): - "linalg.copy"(%buff3, %arg3) : (memref, memref) -> () + test.copy(%buff3, %arg3) : (memref, memref) return } @@ -1393,7 +1252,7 @@ ^exit(%buff : memref<2xf32>) ^exit(%buff3 : memref<2xf32>): - "linalg.copy"(%buff3, %arg3) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%buff3, %arg3) : (memref<2xf32>, memref<2xf32>) return } @@ -1401,7 +1260,11 @@ // ----- -func @assumingOp(%arg0: !shape.witness, %arg2: memref<2xf32>, %arg3: memref<2xf32>) { +// CHECK-LABEL: func @assumingOp( +func @assumingOp( + %arg0: !shape.witness, + %arg2: memref<2xf32>, + %arg3: memref<2xf32>) { // Confirm the alloc will be dealloc'ed in the block. %1 = shape.assuming %arg0 -> memref<2xf32> { %0 = alloc() : memref<2xf32> @@ -1412,28 +1275,22 @@ %2 = alloc() : memref<2xf32> shape.assuming_yield %2 : memref<2xf32> } - "linalg.copy"(%3, %arg3) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%3, %arg3) : (memref<2xf32>, memref<2xf32>) return } -// CHECK-LABEL: func @assumingOp( -// CHECK-SAME: %[[ARG0:.*]]: !shape.witness, -// CHECK-SAME: %[[ARG1:.*]]: memref<2xf32>, -// CHECK-SAME: %[[ARG2:.*]]: memref<2xf32>) { -// CHECK: %[[UNUSED_RESULT:.*]] = shape.assuming %[[ARG0]] -> (memref<2xf32>) { -// CHECK: %[[ALLOC0:.*]] = alloc() : memref<2xf32> -// CHECK: dealloc %[[ALLOC0]] : memref<2xf32> -// CHECK: shape.assuming_yield %[[ARG1]] : memref<2xf32> -// CHECK: } -// CHECK: %[[ASSUMING_RESULT:.*]] = shape.assuming %[[ARG0]] -> (memref<2xf32>) { -// CHECK: %[[TMP_ALLOC:.*]] = alloc() : memref<2xf32> -// CHECK: %[[RETURNING_ALLOC:.*]] = alloc() : memref<2xf32> -// CHECK: linalg.copy(%[[TMP_ALLOC]], %[[RETURNING_ALLOC]]) : memref<2xf32>, memref<2xf32> -// CHECK: dealloc %[[TMP_ALLOC]] : memref<2xf32> -// CHECK: shape.assuming_yield %[[RETURNING_ALLOC]] : memref<2xf32> -// CHECK: } -// CHECK: linalg.copy(%[[ASSUMING_RESULT:.*]], %[[ARG2]]) : memref<2xf32>, memref<2xf32> -// CHECK: dealloc %[[ASSUMING_RESULT]] : memref<2xf32> -// CHECK: return -// CHECK: } - +// CHECK-SAME: %[[ARG0:.*]]: !shape.witness, +// CHECK-SAME: %[[ARG1:.*]]: {{.*}}, +// CHECK-SAME: %[[ARG2:.*]]: {{.*}} +// CHECK: %[[UNUSED_RESULT:.*]] = shape.assuming %[[ARG0]] +// CHECK-NEXT: %[[ALLOC0:.*]] = alloc() +// CHECK-NEXT: dealloc %[[ALLOC0]] +// CHECK-NEXT: shape.assuming_yield %[[ARG1]] +// CHECK: %[[ASSUMING_RESULT:.*]] = shape.assuming %[[ARG0]] +// CHECK-NEXT: %[[TMP_ALLOC:.*]] = alloc() +// CHECK-NEXT: %[[RETURNING_ALLOC:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[TMP_ALLOC]], %[[RETURNING_ALLOC]]) +// CHECK-NEXT: dealloc %[[TMP_ALLOC]] +// CHECK-NEXT: shape.assuming_yield %[[RETURNING_ALLOC]] +// CHECK: test.copy(%[[ASSUMING_RESULT:.*]], %[[ARG2]]) +// CHECK-NEXT: dealloc %[[ASSUMING_RESULT]] diff --git a/mlir/test/Transforms/buffer-hoisting.mlir b/mlir/test/Transforms/buffer-hoisting.mlir --- a/mlir/test/Transforms/buffer-hoisting.mlir +++ b/mlir/test/Transforms/buffer-hoisting.mlir @@ -12,8 +12,6 @@ // BufferHoisting expected behavior: It should move the existing AllocOp to // the entry block. -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @condBranch func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { cond_br %arg0, ^bb1, ^bb2 @@ -21,16 +19,14 @@ br ^bb3(%arg1 : memref<2xf32>) ^bb2: %0 = alloc() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } br ^bb3(%0 : memref<2xf32>) ^bb3(%1: memref<2xf32>): - "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) return } @@ -49,8 +45,6 @@ // to any other block since the alloc has a dynamic dependency to block argument // %0 in bb2. -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @condBranchDynamicType func @condBranchDynamicType( %arg0: i1, @@ -62,16 +56,14 @@ br ^bb3(%arg1 : memref) ^bb2(%0: index): %1 = alloc(%0) : memref - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg1: memref) - outs(%1: memref) { + test.buffer_based in(%arg1: memref) out(%1: memref) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } br ^bb3(%1 : memref) ^bb3(%2: memref): - "linalg.copy"(%2, %arg2) : (memref, memref) -> () + test.copy(%2, %arg2) : (memref, memref) return } @@ -79,7 +71,7 @@ // CHECK: ^bb2 // CHECK: ^bb2(%[[IDX:.*]]:{{.*}}) // CHECK-NEXT: %[[ALLOC0:.*]] = alloc(%[[IDX]]) -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // ----- @@ -99,8 +91,6 @@ // to any other block since the alloc has a dynamic dependency to block argument // %0 in bb2. -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @condBranchDynamicTypeNested func @condBranchDynamicTypeNested( %arg0: i1, @@ -112,12 +102,10 @@ br ^bb6(%arg1 : memref) ^bb2(%0: index): %1 = alloc(%0) : memref - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg1: memref) - outs(%1: memref) { + test.buffer_based in(%arg1: memref) out(%1: memref) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } cond_br %arg0, ^bb3, ^bb4 ^bb3: @@ -129,7 +117,7 @@ ^bb6(%3: memref): br ^bb7(%3 : memref) ^bb7(%4: memref): - "linalg.copy"(%4, %arg2) : (memref, memref) -> () + test.copy(%4, %arg2) : (memref, memref) return } @@ -137,7 +125,7 @@ // CHECK: ^bb2 // CHECK: ^bb2(%[[IDX:.*]]:{{.*}}) // CHECK-NEXT: %[[ALLOC0:.*]] = alloc(%[[IDX]]) -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // ----- @@ -150,23 +138,19 @@ // BufferHoisting expected behavior: It should move the existing AllocOp to // the entry block. -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @criticalEdge func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>) ^bb1: %0 = alloc() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } br ^bb2(%0 : memref<2xf32>) ^bb2(%1: memref<2xf32>): - "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) return } @@ -183,17 +167,13 @@ // bb3 <- Initial position of the second AllocOp // BufferHoisting expected behavior: It shouldn't move the AllocOps. -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @ifElse func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { %0 = alloc() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), @@ -204,25 +184,23 @@ br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>) ^bb3(%5: memref<2xf32>, %6: memref<2xf32>): %7 = alloc() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%7: memref<2xf32>) - outs(%7: memref<2xf32>) { + test.buffer_based in(%7: memref<2xf32>) out(%7: memref<2xf32>) { ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): %tmp2 = exp %gen2_arg0 : f32 - linalg.yield %tmp2 : f32 + test.buffer_tensor_yield %tmp2 : f32 } - "linalg.copy"(%7, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%7, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: %[[ALLOC0:.*]] = alloc() -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // CHECK: br ^bb3 // CHECK: br ^bb3 // CHECK-NEXT: ^bb3 // CHECK: %[[ALLOC1:.*]] = alloc() -// CHECK-NEXT: linalg.generic -// CHECK: linalg.copy(%[[ALLOC1]] +// CHECK-NEXT: test.buffer_based +// CHECK: test.copy(%[[ALLOC1]] // CHECK-NEXT: return // ----- @@ -235,17 +213,13 @@ // bb3 // BufferHoisting expected behavior: It shouldn't move the AllocOp. -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @ifElseNoUsers func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { %0 = alloc() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), @@ -255,12 +229,12 @@ ^bb2(%3: memref<2xf32>, %4: memref<2xf32>): br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>) ^bb3(%5: memref<2xf32>, %6: memref<2xf32>): - "linalg.copy"(%arg1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%arg1, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: %[[ALLOC0:.*]] = alloc() -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // ----- @@ -275,17 +249,13 @@ // bb5 <- Initial position of the second AllocOp // BufferHoisting expected behavior: AllocOps shouldn't be moved. -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @ifElseNested func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { %0 = alloc() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), @@ -300,58 +270,50 @@ br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>) ^bb5(%7: memref<2xf32>, %8: memref<2xf32>): %9 = alloc() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%7: memref<2xf32>) - outs(%9: memref<2xf32>) { + test.buffer_based in(%7: memref<2xf32>) out(%9: memref<2xf32>) { ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): %tmp2 = exp %gen2_arg0 : f32 - linalg.yield %tmp2 : f32 + test.buffer_tensor_yield %tmp2 : f32 } - "linalg.copy"(%9, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%9, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: %[[ALLOC0:.*]] = alloc() -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // CHECK: br ^bb5 // CHECK: br ^bb5 // CHECK: br ^bb5 // CHECK-NEXT: ^bb5 // CHECK: %[[ALLOC1:.*]] = alloc() -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // ----- // Test Case: Dead operations in a single block. // BufferHoisting expected behavior: It shouldn't move the AllocOps. -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @redundantOperations func @redundantOperations(%arg0: memref<2xf32>) { %0 = alloc() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg0: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } %1 = alloc() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%0: memref<2xf32>) - outs(%1: memref<2xf32>) { + test.buffer_based in(%0: memref<2xf32>) out(%1: memref<2xf32>) { ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): %tmp2 = exp %gen2_arg0 : f32 - linalg.yield %tmp2 : f32 + test.buffer_tensor_yield %tmp2 : f32 } return } // CHECK-NEXT: %[[ALLOC0:.*]] = alloc() -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // CHECK: %[[ALLOC1:.*]] = alloc() -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // ----- @@ -364,8 +326,6 @@ // BufferHoisting expected behavior: Both AllocOps should be moved to the // entry block. -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @moving_alloc_and_inserting_missing_dealloc func @moving_alloc_and_inserting_missing_dealloc( %cond: i1, @@ -374,26 +334,22 @@ cond_br %cond, ^bb1, ^bb2 ^bb1: %0 = alloc() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg0: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } br ^exit(%0 : memref<2xf32>) ^bb2: %1 = alloc() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg0: memref<2xf32>) - outs(%1: memref<2xf32>) { + test.buffer_based in(%arg0: memref<2xf32>) out(%1: memref<2xf32>) { ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): %tmp2 = exp %gen2_arg0 : f32 - linalg.yield %tmp2 : f32 + test.buffer_tensor_yield %tmp2 : f32 } br ^exit(%1 : memref<2xf32>) ^exit(%arg2: memref<2xf32>): - "linalg.copy"(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>) return } @@ -413,8 +369,6 @@ // BufferHoisting expected behavior: It should move the AllocOp to the entry // block. -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @moving_invalid_dealloc_op_complex func @moving_invalid_dealloc_op_complex( %cond: i1, @@ -425,17 +379,15 @@ br ^exit(%arg0 : memref<2xf32>) ^bb2: %1 = alloc() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg0: memref<2xf32>) - outs(%1: memref<2xf32>) { + test.buffer_based in(%arg0: memref<2xf32>) out(%1: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } dealloc %1 : memref<2xf32> br ^exit(%1 : memref<2xf32>) ^exit(%arg2: memref<2xf32>): - "linalg.copy"(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>) return } @@ -444,13 +396,11 @@ // ----- -// Test Case: Nested regions - This test defines a GenericOp inside the region -// of another GenericOp. -// BufferHoisting expected behavior: The AllocOp of inner GenericOp should -// remain inside the region of outer GenericOp. The AllocOp of the outer -// GenericOp should be moved to the entry block. - -#map0 = affine_map<(d0) -> (d0)> +// Test Case: Nested regions - This test defines a BufferBasedOp inside the +// region of another BufferBasedOp. +// BufferHoisting expected behavior: The AllocOp of inner BufferBasedOp should +// remain inside the region of outer BufferBasedOp. The AllocOp of the outer +// BufferBasedOp should be moved to the entry block. // CHECK-LABEL: func @nested_regions_and_cond_branch func @nested_regions_and_cond_branch( @@ -462,35 +412,27 @@ br ^bb3(%arg1 : memref<2xf32>) ^bb2: %0 = alloc() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %1 = alloc() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%1: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%1: memref<2xf32>) { ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): %tmp2 = exp %gen2_arg0 : f32 - linalg.yield %tmp2 : f32 + test.buffer_tensor_yield %tmp2 : f32 } %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } br ^bb3(%0 : memref<2xf32>) ^bb3(%1: memref<2xf32>): - "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: %[[ALLOC0:.*]] = alloc() // CHECK-NEXT: cond_br -// CHECK: linalg.generic +// CHECK: test.buffer_based // CHECK: %[[ALLOC1:.*]] = alloc() -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // ----- @@ -630,8 +572,6 @@ // ----- -#map0 = affine_map<(d0) -> (d0)> - // Test Case: Alloca operations shouldn't be moved. // CHECK-LABEL: func @condBranchAlloca @@ -641,16 +581,14 @@ br ^bb3(%arg1 : memref<2xf32>) ^bb2: %0 = alloca() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } br ^bb3(%0 : memref<2xf32>) ^bb3(%1: memref<2xf32>): - "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) return } @@ -658,12 +596,10 @@ // CHECK: ^bb2 // CHECK: ^bb2 // CHECK-NEXT: %[[ALLOCA:.*]] = alloca() -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // ----- -#map0 = affine_map<(d0) -> (d0)> - // Test Case: Alloca operations shouldn't be moved. The alloc operation also // shouldn't be moved analogously to the ifElseNested test. @@ -673,12 +609,10 @@ %arg1: memref<2xf32>, %arg2: memref<2xf32>) { %0 = alloca() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), @@ -693,30 +627,26 @@ br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>) ^bb5(%7: memref<2xf32>, %8: memref<2xf32>): %9 = alloc() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%7: memref<2xf32>) - outs(%9: memref<2xf32>) { + test.buffer_based in(%7: memref<2xf32>) out(%9: memref<2xf32>) { ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): %tmp2 = exp %gen2_arg0 : f32 - linalg.yield %tmp2 : f32 + test.buffer_tensor_yield %tmp2 : f32 } - "linalg.copy"(%9, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%9, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: %[[ALLOCA:.*]] = alloca() -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // CHECK: ^bb5 // CHECK: ^bb5 // CHECK: ^bb5 // CHECK-NEXT: ^bb5 // CHECK-NEXT: %[[ALLOC:.*]] = alloc() -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // ----- -#map0 = affine_map<(d0) -> (d0)> - // Test Case: Alloca operations shouldn't be moved. The alloc operation should // be moved in the beginning analogous to the nestedRegionsAndCondBranch test. @@ -730,35 +660,27 @@ br ^bb3(%arg1 : memref<2xf32>) ^bb2: %0 = alloc() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %1 = alloca() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%1: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%1: memref<2xf32>) { ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): %tmp2 = exp %gen2_arg0 : f32 - linalg.yield %tmp2 : f32 + test.buffer_tensor_yield %tmp2 : f32 } %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } br ^bb3(%0 : memref<2xf32>) ^bb3(%1: memref<2xf32>): - "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: %[[ALLOC:.*]] = alloc() // CHECK-NEXT: cond_br -// CHECK: linalg.generic +// CHECK: test.buffer_based // CHECK: %[[ALLOCA:.*]] = alloca() -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // ----- @@ -779,7 +701,7 @@ %3 = alloc() : memref<2xf32> scf.yield %3 : memref<2xf32> } - "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>) return } @@ -852,7 +774,7 @@ } scf.yield %2 : memref<2xf32> } - "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>) return } @@ -893,7 +815,7 @@ } scf.yield %0 : memref } - "linalg.copy"(%1, %res) : (memref, memref) -> () + test.copy(%1, %res) : (memref, memref) return } diff --git a/mlir/test/Transforms/buffer-loop-hoisting.mlir b/mlir/test/Transforms/buffer-loop-hoisting.mlir --- a/mlir/test/Transforms/buffer-loop-hoisting.mlir +++ b/mlir/test/Transforms/buffer-loop-hoisting.mlir @@ -11,8 +11,6 @@ // bb3 // BufferLoopHoisting expected behavior: It should not move the AllocOp. -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @condBranch func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { cond_br %arg0, ^bb1, ^bb2 @@ -20,16 +18,14 @@ br ^bb3(%arg1 : memref<2xf32>) ^bb2: %0 = alloc() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } br ^bb3(%0 : memref<2xf32>) ^bb3(%1: memref<2xf32>): - "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) return } @@ -48,8 +44,6 @@ // to any other block since the alloc has a dynamic dependency to block argument // %0 in bb2. -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @condBranchDynamicType func @condBranchDynamicType( %arg0: i1, @@ -61,16 +55,14 @@ br ^bb3(%arg1 : memref) ^bb2(%0: index): %1 = alloc(%0) : memref - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg1: memref) - outs(%1: memref) { + test.buffer_based in(%arg1: memref) out(%1: memref) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } br ^bb3(%1 : memref) ^bb3(%2: memref): - "linalg.copy"(%2, %arg2) : (memref, memref) -> () + test.copy(%2, %arg2) : (memref, memref) return } @@ -78,17 +70,15 @@ // CHECK: ^bb2 // CHECK: ^bb2(%[[IDX:.*]]:{{.*}}) // CHECK-NEXT: %[[ALLOC0:.*]] = alloc(%[[IDX]]) -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // ----- -// Test Case: Nested regions - This test defines a GenericOp inside the region -// of another GenericOp. -// BufferLoopHoisting expected behavior: The AllocOp of inner GenericOp should -// remain inside the region of outer GenericOp. The AllocOp of the outer -// GenericOp should not be moved during this pass. - -#map0 = affine_map<(d0) -> (d0)> +// Test Case: Nested regions - This test defines a BufferBasedOp inside the +// region of another BufferBasedOp. +// BufferLoopHoisting expected behavior: The AllocOp of inner BufferBasedOp +// should remain inside the region of outer BufferBasedOp. The AllocOp of the +// outer BufferBasedOp should not be moved during this pass. // CHECK-LABEL: func @nested_regions_and_cond_branch func @nested_regions_and_cond_branch( @@ -100,35 +90,27 @@ br ^bb3(%arg1 : memref<2xf32>) ^bb2: %0 = alloc() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %1 = alloc() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%1: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%1: memref<2xf32>) { ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): %tmp2 = exp %gen2_arg0 : f32 - linalg.yield %tmp2 : f32 + test.buffer_tensor_yield %tmp2 : f32 } %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } br ^bb3(%0 : memref<2xf32>) ^bb3(%1: memref<2xf32>): - "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: cond_br // CHECK: %[[ALLOC0:.*]] = alloc() -// CHECK: linalg.generic +// CHECK: test.buffer_based // CHECK: %[[ALLOC1:.*]] = alloc() -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // ----- @@ -175,7 +157,7 @@ %3 = alloc() : memref<2xf32> scf.yield %3 : memref<2xf32> } - "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>) return } @@ -251,7 +233,7 @@ } scf.yield %2 : memref<2xf32> } - "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>) return } @@ -297,7 +279,7 @@ } scf.yield %0 : memref } - "linalg.copy"(%1, %res) : (memref, memref) -> () + test.copy(%1, %res) : (memref, memref) return } @@ -323,7 +305,7 @@ %2 = alloc() : memref<2xf32> scf.yield %0 : memref<2xf32> } - "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>) return } @@ -345,7 +327,7 @@ %1 = alloc() : memref<2xf32> scf.yield %1 : memref<2xf32> } - "linalg.copy"(%0, %res) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%0, %res) : (memref<2xf32>, memref<2xf32>) return } @@ -371,7 +353,7 @@ } scf.yield %0 : memref<2xf32> } - "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>) return } @@ -399,7 +381,7 @@ } scf.yield %2 : memref<2xf32> } - "linalg.copy"(%0, %res) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%0, %res) : (memref<2xf32>, memref<2xf32>) return } @@ -430,7 +412,7 @@ { scf.yield %0 : memref<2xf32> } - "linalg.copy"(%2, %res) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%2, %res) : (memref<2xf32>, memref<2xf32>) return } @@ -453,7 +435,7 @@ %2 = alloc(%i) : memref scf.yield %0 : memref<2xf32> } - "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>) return } @@ -480,7 +462,7 @@ } scf.yield %0 : memref<2xf32> } - "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>) return } diff --git a/mlir/test/Transforms/buffer-placement-preparation-allowed-memref-results.mlir b/mlir/test/Transforms/buffer-placement-preparation-allowed-memref-results.mlir --- a/mlir/test/Transforms/buffer-placement-preparation-allowed-memref-results.mlir +++ b/mlir/test/Transforms/buffer-placement-preparation-allowed-memref-results.mlir @@ -1,9 +1,9 @@ // RUN: mlir-opt -test-buffer-placement-preparation-with-allowed-memref-results -split-input-file %s | FileCheck %s -// Since allowMemrefEscaping is on for Buffer Placement in this test pass, all -// tensor typed function results are converted to memref and remain as function -// results. All memref typed function results will escape from the deallocation -// phase of Buffer Placement. +// Since allowMemrefEscaping is active for Buffer Placement in this test pass, +// all tensor typed function results are converted to memref and remain as +// function results. All memref typed function results will escape from the +// deallocation phase of Buffer Placement. // CHECK-LABEL: func @void_function_signature_conversion func @void_function_signature_conversion(%arg0: tensor<4x8xf32>) { @@ -13,26 +13,32 @@ // ----- -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @complex_signature_conversion -func @complex_signature_conversion(%arg0: tensor<5xf32>, %arg1: memref<10xf32>, %arg2: i1, %arg3: f16) -> (i1, tensor<5xf32>, memref<10xf32>, memref<15xf32>, f16) { +func @complex_signature_conversion( + %arg0: tensor<5xf32>, + %arg1: memref<10xf32>, + %arg2: i1, %arg3: f16) -> ( + i1, + tensor<5xf32>, + memref<10xf32>, + memref<15xf32>, + f16) { %0 = alloc() : memref<15xf32> - %1 = linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg0 : tensor<5xf32>) { + %1 = test.tensor_based in(%arg0 : tensor<5xf32>) { ^bb0(%gen1_arg0: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } -> tensor<5xf32> - return %arg2, %1, %arg1, %0, %arg3 : i1, tensor<5xf32>, memref<10xf32>, memref<15xf32>, f16 + return %arg2, %1, %arg1, %0, %arg3 : + i1, tensor<5xf32>, memref<10xf32>, memref<15xf32>, f16 } -// CHECK: (%[[ARG0:.*]]: memref<5xf32>, %[[ARG1:.*]]: memref<10xf32>, %[[ARG2:.*]]: i1, %[[ARG3:.*]]: f16) +// CHECK: (%[[ARG0:.*]]: memref<5xf32>, %[[ARG1:.*]]: memref<10xf32>, +// CHECK-SAME: %[[ARG2:.*]]: i1, %[[ARG3:.*]]: f16) // CHECK-SAME: (i1, memref<5xf32>, memref<10xf32>, memref<15xf32>, f16) // CHECK: %[[FIRST_ALLOC:.*]] = alloc() -// CHECK: %[[LINALG_ALLOC:.*]] = alloc() -// CHECK: return %[[ARG2]], %[[LINALG_ALLOC]], %[[ARG1]], %[[FIRST_ALLOC]], %[[ARG3]] +// CHECK: %[[TENSOR_ALLOC:.*]] = alloc() +// CHECK: return %[[ARG2]], %[[TENSOR_ALLOC]], %[[ARG1]], %[[FIRST_ALLOC]], +// CHECK-SAME: %[[ARG3]] // ----- @@ -111,12 +117,13 @@ // ----- -// Test case: Testing BufferAssignmentCallOpConverter to see if it matches with the -// signature of the new signature of the callee function when there are tuple typed -// args and results. BufferAssignmentTypeConverter is set to flatten tuple typed +// Test case: Testing BufferizeCallOpConverter to see if it matches with the +// signature of the new signature of the callee function when there are tuple +// typed args and results. BufferizeTypeConverter is set to flatten tuple typed // arguments. The tuple typed values should be decomposed and composed using // get_tuple_element and make_tuple operations of test dialect. Tensor types are -// converted to Memref. Memref typed function results remain as function results. +// converted to Memref. Memref typed function results remain as function +// results. // CHECK-LABEL: func @callee func @callee(%arg0: tuple,i1, tensor<5xf32>>) -> (tuple,i1, tensor<5xf32>>){ @@ -158,10 +165,10 @@ // ----- -// Test case: Testing BufferAssignmentFuncOpConverter and -// BufferAssignmentReturnOpConverter to see if the return operation matches with -// the new function signature when there are tuple typed args and results. -// BufferAssignmentTypeConverter is set to flatten tuple typed arguments. The tuple +// Test case: Testing BufferizeFuncOpConverter and +// BufferizeReturnOpConverter to see if the return operation matches with the +// new function signature when there are tuple typed args and results. +// BufferizeTypeConverter is set to flatten tuple typed arguments. The tuple // typed values should be decomposed and composed using get_tuple_element and // make_tuple operations of test dialect. Tensor types are converted to Memref. // Memref typed function results remain as function results. diff --git a/mlir/test/Transforms/buffer-placement-preparation.mlir b/mlir/test/Transforms/buffer-placement-preparation.mlir --- a/mlir/test/Transforms/buffer-placement-preparation.mlir +++ b/mlir/test/Transforms/buffer-placement-preparation.mlir @@ -8,28 +8,27 @@ // ----- -// Only tensor typed function result should be converted to memref and move to the -// function arguments list. The other memref function results remain as function -// results. - -#map0 = affine_map<(d0) -> (d0)> +// Only tensor typed function result should be converted to memref and move to +// the function arguments list. The other memref function results remain as +// function results. // CHECK-LABEL: func @memref_in_function_results -func @memref_in_function_results(%arg0: tensor<5xf32>, %arg1: memref<10xf32>) -> (tensor<5xf32>, memref<10xf32>, memref<15xf32>) { +func @memref_in_function_results(%arg0: tensor<5xf32>, %arg1: memref<10xf32>) + -> (tensor<5xf32>, memref<10xf32>, memref<15xf32>) { %0 = alloc() : memref<15xf32> - %1 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg0 : tensor<5xf32>) { + %1 = test.tensor_based in(%arg0 : tensor<5xf32>) { ^bb0(%gen1_arg0: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } -> tensor<5xf32> return %1, %arg1, %0 : tensor<5xf32>, memref<10xf32>, memref<15xf32> } -// CHECK: (%[[ARG0:.*]]: memref<5xf32>, %[[ARG1:.*]]: memref<10xf32>, %[[RESULT:.*]]: memref<5xf32>) +// CHECK: (%[[ARG0:.*]]: memref<5xf32>, %[[ARG1:.*]]: memref<10xf32>, +// CHECK-SAME: %[[RESULT:.*]]: memref<5xf32>) // CHECK-SAME: (memref<10xf32>, memref<15xf32>) // CHECK: %[[FIRST_ALLOC:.*]] = alloc() -// CHECK: %[[LINALG_ALLOC:.*]] = alloc() -// CHECK: linalg.copy(%[[LINALG_ALLOC]], %[[RESULT]]) +// CHECK: %[[TENSOR_ALLOC:.*]] = alloc() +// CHECK: test.copy(%[[TENSOR_ALLOC]], %[[RESULT]]) // CHECK: return %[[ARG1]], %[[FIRST_ALLOC]] // ----- @@ -52,29 +51,38 @@ // ----- // CHECK-LABEL: func @complex_signature_conversion -func @complex_signature_conversion(%arg0: tensor<4x8xf32>, %arg1: i1, %arg2: tensor<5x5xf64>,%arg3: f16) -> (i1, tensor<5x5xf64>, f16, tensor<4x8xf32>) { - return %arg1, %arg2, %arg3, %arg0 : i1, tensor<5x5xf64>, f16, tensor<4x8xf32> +func @complex_signature_conversion(%arg0: tensor<4x8xf32>, %arg1: i1, + %arg2: tensor<5x5xf64>,%arg3: f16) -> + (i1, tensor<5x5xf64>, f16, tensor<4x8xf32>) { + return %arg1, %arg2, %arg3, %arg0 : i1, tensor<5x5xf64>, f16, + tensor<4x8xf32> } -// CHECK: (%[[ARG0:.*]]: memref<4x8xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5x5xf64>, %[[ARG3:.*]]: f16, -// CHECK-SAME: %[[RESULT1:.*]]: memref<5x5xf64>, %[[RESULT2:.*]]: memref<4x8xf32>) -> (i1, f16) { -// CHECK-NEXT: linalg.copy(%[[ARG2]], %[[RESULT1]]) -// CHECK-NEXT: linalg.copy(%[[ARG0]], %[[RESULT2]]) +// CHECK: (%[[ARG0:.*]]: memref<4x8xf32>, %[[ARG1:.*]]: i1 +// CHECK-SAME: %[[ARG2:.*]]: memref<5x5xf64>, %[[ARG3:.*]]: f16 +// CHECK-SAME: %[[RESULT1:.*]]: memref<5x5xf64> +// CHECK-SAME: %[[RESULT2:.*]]: memref<4x8xf32>) -> (i1, f16) { +// CHECK-NEXT: test.copy(%[[ARG2]], %[[RESULT1]]) +// CHECK-NEXT: test.copy(%[[ARG0]], %[[RESULT2]]) // CHECK-NEXT: return %[[ARG1]], %[[ARG3]] // ----- // CHECK-LABEL: func @non_void_to_void_return_op_converter -func @non_void_to_void_return_op_converter(%arg0: tensor<4x8xf32>) -> tensor<4x8xf32> { +func @non_void_to_void_return_op_converter(%arg0: tensor<4x8xf32>) + -> tensor<4x8xf32> { return %arg0 : tensor<4x8xf32> } -// CHECK: (%[[ARG0:.*]]: [[TYPE:.*]]<[[RANK:.*]]>, %[[RESULT:.*]]: [[TYPE]]<[[RANK]]>) { -// CHECK-NEXT: linalg.copy(%[[ARG0]], %[[RESULT]]) +// CHECK: (%[[ARG0:.*]]: [[TYPE:.*]]<[[RANK:.*]]>, +// CHECK-SAME: %[[RESULT:.*]]: [[TYPE]]<[[RANK]]>) { +// CHECK-NEXT: test.copy(%[[ARG0]], %[[RESULT]]) // CHECK-NEXT: return // ----- // CHECK-LABEL: func @func_and_block_signature_conversion -func @func_and_block_signature_conversion(%arg0 : tensor<2xf32>, %cond : i1, %arg1: tensor<4x4xf32>) -> tensor<4x4xf32>{ +func @func_and_block_signature_conversion(%arg0 : tensor<2xf32>, %cond : i1, + %arg1: tensor<4x4xf32>) + -> tensor<4x4xf32>{ cond_br %cond, ^bb1, ^bb2 ^bb1: br ^exit(%arg0 : tensor<2xf32>) @@ -83,129 +91,120 @@ ^exit(%arg2: tensor<2xf32>): return %arg1 : tensor<4x4xf32> } -// CHECK: (%[[ARG0:.*]]: [[ARG0_TYPE:.*]], %[[COND:.*]]: i1, %[[ARG1:.*]]: [[ARG1_TYPE:.*]], %[[RESULT:.*]]: [[RESULT_TYPE:.*]]) { +// CHECK: (%[[ARG0:.*]]: [[ARG0_TYPE:.*]], %[[COND:.*]]: i1, +// CHECK-SAME: %[[ARG1:.*]]: [[ARG1_TYPE:.*]], +// CHECK-SAME: %[[RESULT:.*]]: [[RESULT_TYPE:.*]]) { // CHECK: br ^[[EXIT_BLOCK:.*]](%[[ARG0]] : [[ARG0_TYPE]]) // CHECK: br ^[[EXIT_BLOCK]](%[[ARG0]] : [[ARG0_TYPE]]) // CHECK: ^[[EXIT_BLOCK]](%{{.*}}: [[ARG0_TYPE]]) -// CHECK-NEXT: linalg.copy(%[[ARG1]], %[[RESULT]]) +// CHECK-NEXT: test.copy(%[[ARG1]], %[[RESULT]]) // CHECK-NEXT: return // ----- -// Test Case: Simple case for checking if BufferizePlacer creates AllocOps right before GenericOps. - -#map0 = affine_map<(d0) -> (d0)> +// Test Case: Simple case for checking if BufferizePlacer creates AllocOps +// right before TensorBasedOp. // CHECK-LABEL: func @compute_allocs_position_simple -func @compute_allocs_position_simple(%cond: i1, %arg0: tensor<2xf32>) -> tensor<2xf32>{ - %0 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg0 : tensor<2xf32>) { +func @compute_allocs_position_simple(%cond: i1, %arg0: tensor<2xf32>) + -> tensor<2xf32>{ + %0 = test.tensor_based in(%arg0 : tensor<2xf32>) { ^bb0(%gen1_arg0: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } -> tensor<2xf32> - %1 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%0 : tensor<2xf32>) { + %1 = test.tensor_based in(%0 : tensor<2xf32>) { ^bb0(%gen2_arg0: f32): %tmp2 = exp %gen2_arg0 : f32 - linalg.yield %tmp2 : f32 + test.buffer_tensor_yield %tmp2 : f32 } -> tensor<2xf32> return %1 : tensor<2xf32> } // CHECK: (%{{.*}}: {{.*}}, %[[ARG0:.*]]: memref<2xf32>, // CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc() -// CHECK-NEXT: linalg.generic {{.*}} ins(%[[ARG0]]{{.*}} outs(%[[FIRST_ALLOC]] +// CHECK-NEXT: test.buffer_based in(%[[ARG0]]{{.*}} out(%[[FIRST_ALLOC]] // CHECK: %[[SECOND_ALLOC:.*]] = alloc() -// CHECK-NEXT: linalg.generic {{.*}} ins(%[[FIRST_ALLOC]]{{.*}} outs(%[[SECOND_ALLOC]] +// CHECK-NEXT: test.buffer_based in(%[[FIRST_ALLOC]]{{.*}} out(%[[SECOND_ALLOC]] // ----- -// Test Case: if-else case for checking if BufferizePlacer creates AllocOps right before GenericOps. - -#map0 = affine_map<(d0) -> (d0)> +// Test Case: if-else case for checking if BufferizePlacer creates AllocOps +// right before TensorBasedOp. // CHECK-LABEL: func @compute_allocs_position func @compute_allocs_position(%cond: i1, %arg0: tensor<2xf32>) -> tensor<2xf32>{ - %0 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg0 : tensor<2xf32>) { + %0 = test.tensor_based in(%arg0 : tensor<2xf32>) { ^bb0(%gen1_arg0: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } -> tensor<2xf32> - %1 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%0 : tensor<2xf32>) { + %1 = test.tensor_based in(%0 : tensor<2xf32>) { ^bb0(%gen2_arg0: f32): %tmp2 = exp %gen2_arg0 : f32 - linalg.yield %tmp2 : f32 + test.buffer_tensor_yield %tmp2 : f32 } -> tensor<2xf32> cond_br %cond, ^bb1(%arg0, %0: tensor<2xf32>, tensor<2xf32>), ^bb2(%0, %arg0: tensor<2xf32>, tensor<2xf32>) ^bb1(%arg1 : tensor<2xf32>, %arg2 : tensor<2xf32>): - %2 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg0 : tensor<2xf32>) { + %2 = test.tensor_based in(%arg0 : tensor<2xf32>) { ^bb0(%gen3_arg0: f32): %tmp3 = exp %gen3_arg0 : f32 - linalg.yield %tmp3 : f32 + test.buffer_tensor_yield %tmp3 : f32 } -> tensor<2xf32> - %3 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%2 : tensor<2xf32>) { + %3 = test.tensor_based in(%2 : tensor<2xf32>) { ^bb0(%gen4_arg0: f32): %tmp4 = exp %gen4_arg0 : f32 - linalg.yield %tmp4 : f32 + test.buffer_tensor_yield %tmp4 : f32 } -> tensor<2xf32> br ^exit(%arg1, %arg2 : tensor<2xf32>, tensor<2xf32>) ^bb2(%arg3 : tensor<2xf32>, %arg4 : tensor<2xf32>): - %4 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg0 : tensor<2xf32>) { + %4 = test.tensor_based in(%arg0 : tensor<2xf32>) { ^bb0(%gen5_arg0: f32): %tmp5 = exp %gen5_arg0 : f32 - linalg.yield %tmp5 : f32 + test.buffer_tensor_yield %tmp5 : f32 } -> tensor<2xf32> - %5 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%4 : tensor<2xf32>) { + %5 = test.tensor_based in(%4 : tensor<2xf32>) { ^bb0(%gen6_arg0: f32): %tmp6 = exp %gen6_arg0 : f32 - linalg.yield %tmp6 : f32 + test.buffer_tensor_yield %tmp6 : f32 } -> tensor<2xf32> br ^exit(%arg3, %arg4 : tensor<2xf32>, tensor<2xf32>) ^exit(%arg5 : tensor<2xf32>, %arg6 : tensor<2xf32>): - %6 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg0 : tensor<2xf32>) { + %6 = test.tensor_based in(%arg0 : tensor<2xf32>) { ^bb0(%gen7_arg0: f32): %tmp7 = exp %gen7_arg0 : f32 - linalg.yield %tmp7 : f32 + test.buffer_tensor_yield %tmp7 : f32 } -> tensor<2xf32> - %7 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%6 : tensor<2xf32>) { + %7 = test.tensor_based in(%6 : tensor<2xf32>) { ^bb0(%gen8_arg0: f32): %tmp8 = exp %gen8_arg0 : f32 - linalg.yield %tmp8 : f32 + test.buffer_tensor_yield %tmp8 : f32 } -> tensor<2xf32> return %7 : tensor<2xf32> } // CHECK: (%{{.*}}: {{.*}}, %[[ARG0:.*]]: memref<2xf32>, // CHECK-NEXT: %[[ALLOC0:.*]] = alloc() -// CHECK-NEXT: linalg.generic {{.*}} ins(%[[ARG0]]{{.*}} outs(%[[ALLOC0]] +// CHECK-NEXT: test.buffer_based in(%[[ARG0]]{{.*}} out(%[[ALLOC0]] // CHECK: %[[ALLOC1:.*]] = alloc() -// CHECK-NEXT: linalg.generic {{.*}} ins(%[[ALLOC0]]{{.*}} outs(%[[ALLOC1]] +// CHECK-NEXT: test.buffer_based in(%[[ALLOC0]]{{.*}} out(%[[ALLOC1]] // CHECK: cond_br %{{.*}}, ^[[BB0:.*]]({{.*}}), ^[[BB1:.*]]( // CHECK-NEXT: ^[[BB0]] // CHECK-NEXT: %[[ALLOC2:.*]] = alloc() -// CHECK-NEXT: linalg.generic {{.*}} ins(%[[ARG0]]{{.*}} outs(%[[ALLOC2]] +// CHECK-NEXT: test.buffer_based in(%[[ARG0]]{{.*}} out(%[[ALLOC2]] // CHECK: %[[ALLOC3:.*]] = alloc() -// CHECK-NEXT: linalg.generic {{.*}} ins(%[[ALLOC2]]{{.*}} outs(%[[ALLOC3]] +// CHECK-NEXT: test.buffer_based in(%[[ALLOC2]]{{.*}} out(%[[ALLOC3]] // CHECK: br ^[[EXIT:.*]]({{.*}}) // CHECK-NEXT: ^[[BB1]] // CHECK-NEXT: %[[ALLOC4:.*]] = alloc() -// CHECK-NEXT: linalg.generic {{.*}} ins(%[[ARG0]]{{.*}} outs(%[[ALLOC4]] +// CHECK-NEXT: test.buffer_based in(%[[ARG0]]{{.*}} out(%[[ALLOC4]] // CHECK: %[[ALLOC5:.*]] = alloc() -// CHECK-NEXT: linalg.generic {{.*}} ins(%[[ALLOC4]]{{.*}} outs(%[[ALLOC5]] +// CHECK-NEXT: test.buffer_based in(%[[ALLOC4]]{{.*}} out(%[[ALLOC5]] // CHECK: br ^[[EXIT]] // CHECK-NEXT: ^[[EXIT]] // CHECK-NEXT: %[[ALLOC6:.*]] = alloc() -// CHECK-NEXT: linalg.generic {{.*}} ins(%[[ARG0]]{{.*}} outs(%[[ALLOC6]] +// CHECK-NEXT: test.buffer_based in(%[[ARG0]]{{.*}} out(%[[ALLOC6]] // CHECK: %[[ALLOC7:.*]] = alloc() -// CHECK-NEXT: linalg.generic {{.*}} ins(%[[ALLOC6]]{{.*}} outs(%[[ALLOC7]] +// CHECK-NEXT: test.buffer_based in(%[[ALLOC6]]{{.*}} out(%[[ALLOC7]] // ----- @@ -218,22 +217,20 @@ // The operands and results of caller and return operations must be matched // respectively. -#map0 = affine_map<(d0) -> (d0)> - // CHECK-LABEL: func @callee func @callee(%arg1: tensor<5xf32>) -> tensor<5xf32> { - %0 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg1 : tensor<5xf32>) { + %0 = test.tensor_based in(%arg1 : tensor<5xf32>) { ^bb0(%gen1_arg0: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } -> tensor<5xf32> return %0 : tensor<5xf32> } -// CHECK: (%[[CALLEE_ARG:.*]]: memref<5xf32>, %[[CALLEE_RESULT:.*]]: memref<5xf32>) +// CHECK: (%[[CALLEE_ARG:.*]]: memref<5xf32>, +// CHECK-SAME: %[[CALLEE_RESULT:.*]]: memref<5xf32>) // CHECK: %[[ALLOC:.*]] = alloc() -// CHECK: linalg.generic -// CHECK: linalg.copy(%[[ALLOC]], %[[CALLEE_RESULT]]) +// CHECK: test.buffer_based +// CHECK: test.copy(%[[ALLOC]], %[[CALLEE_RESULT]]) // CHECK: return // CHECK-LABEL: func @caller @@ -242,12 +239,13 @@ %y = call @callee(%x) : (tensor<5xf32>) -> tensor<5xf32> return %y : tensor<5xf32> } -// CHECK: (%[[CALLER_ARG:.*]]: memref<5xf32>, %[[CALLER_RESULT:.*]]: memref<5xf32>) +// CHECK: (%[[CALLER_ARG:.*]]: memref<5xf32>, +// CHECK-SAME: %[[CALLER_RESULT:.*]]: memref<5xf32>) // CHECK: %[[FIRST_ALLOC:.*]] = alloc() // CHECK: call @callee(%[[CALLER_ARG]], %[[FIRST_ALLOC]]) // CHECK: %[[SECOND_ALLOC:.*]] = alloc() // CHECK: call @callee(%[[FIRST_ALLOC]], %[[SECOND_ALLOC]]) -// CHECK: linalg.copy(%[[SECOND_ALLOC]], %[[CALLER_RESULT]]) +// CHECK: test.copy(%[[SECOND_ALLOC]], %[[CALLER_RESULT]]) // CHECK: return // ----- @@ -259,37 +257,39 @@ // func @callee(%arg0: memref<5xf32>,%arg1: memref<5xf32>)-> memref<2xf32> -// where %arg0 is the input and %arg1 is the output buffer and the original memref -// type result remain as the function result. Then, the rewriter should match the -// caller's signature with the callee. Thus, two buffers will be allocated instead -// of %x0 and %y0 and they are passed to the callers' operands list as the output -// buffers. %x1 and %y1 remain as callers' results. - +// where %arg0 is the input and %arg1 is the output buffer and the original +// memref type result remain as the function result. Then, the rewriter should +// match the caller's signature with the callee. Thus, two buffers will be +// allocated instead of %x0 and %y0 and they are passed to the callers' operands +// list as the output buffers. %x1 and %y1 remain as callers' results. // CHECK-LABEL: func @callee func @callee(%arg1: tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>) { %buff = alloc() : memref<2xf32> return %arg1, %buff : tensor<5xf32>, memref<2xf32> } -// CHECK: (%[[CALLEE_ARG:.*]]: memref<5xf32>, %[[CALLEE_RESULT:.*]]: memref<5xf32>) +// CHECK: (%[[CALLEE_ARG:.*]]: memref<5xf32>, +// CHECK-SAME: %[[CALLEE_RESULT:.*]]: memref<5xf32>) // CHECK-SAME: memref<2xf32> // CHECK: %[[ALLOC:.*]] = alloc() -// CHECK: linalg.copy(%[[CALLEE_ARG]], %[[CALLEE_RESULT]]) +// CHECK: test.copy(%[[CALLEE_ARG]], %[[CALLEE_RESULT]]) // CHECK: return %[[ALLOC]] - // CHECK-LABEL: func @caller func @caller(%arg0: tensor<5xf32>) -> tensor<5xf32> { - %x0, %x1 = call @callee(%arg0) : (tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>) - %y0, %y1 = call @callee(%x0) : (tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>) + %x0, %x1 = call @callee(%arg0) : (tensor<5xf32>) + -> (tensor<5xf32>, memref<2xf32>) + %y0, %y1 = call @callee(%x0) : (tensor<5xf32>) + -> (tensor<5xf32>, memref<2xf32>) return %y0 : tensor<5xf32> } -// CHECK: (%[[CALLER_ARG:.*]]: memref<5xf32>, %[[CALLER_RESULT:.*]]: memref<5xf32>) +// CHECK: (%[[CALLER_ARG:.*]]: memref<5xf32>, +// CHECK-SAME: %[[CALLER_RESULT:.*]]: memref<5xf32>) // CHECK: %[[X0:.*]] = alloc() // CHECK: %[[X1:.*]] = call @callee(%[[CALLER_ARG]], %[[X0]]) // CHECK: %[[Y0:.*]] = alloc() // CHECK: %[[Y1:.*]] = call @callee(%[[X0]], %[[Y0]]) -// CHECK: linalg.copy(%[[Y0]], %[[CALLER_RESULT]]) +// CHECK: test.copy(%[[Y0]], %[[CALLER_RESULT]]) // CHECK: return // ----- @@ -302,83 +302,126 @@ // ----- -// Test case: Testing BufferAssignmentCallOpConverter to see if it matches with the -// signature of the new signature of the callee function when there are tuple typed -// args and results. BufferAssignmentTypeConverter is set to flatten tuple typed -// arguments. The tuple typed values should be decomposed and composed using -// get_tuple_element and make_tuple operations of test dialect. Tensor types are -// converted to Memref. Memref typed function results are appended to the function -// arguments list. +// Test case: Testing BufferizeCallOpConverter to see if it matches with the +// signature of the new signature of the callee function when there are tuple +// typed args and results. BufferizeCallOpConverter is set to flatten tuple +// typed arguments. The tuple typed values should be decomposed and composed +// using get_tuple_element and make_tuple operations of test dialect. Tensor +// types are converted to Memref. Memref typed function results are appended to +// the function arguments list. // CHECK-LABEL: func @callee -func @callee(%arg0: tuple,i1, tensor<5xf32>>) -> (tuple,i1, tensor<5xf32>>){ +func @callee(%arg0: tuple,i1, tensor<5xf32>>) + -> (tuple,i1, tensor<5xf32>>){ return %arg0 : tuple,i1, tensor<5xf32>> } -// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5xf32>, %[[RESULT0:.*]]: memref<2xf32>, %[[RESULT1:.*]]: memref<5xf32>) -// CHECK-SAME: i1 -// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) -// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32} -// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32} -// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32} -// CHECK-NEXT: linalg.copy(%[[FIRST_ELEM]], %[[RESULT0]]) -// CHECK-NEXT: linalg.copy(%[[THIRD_ELEM]], %[[RESULT1]]) +// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, +// CHECK-SAME: %[[ARG2:.*]]: memref<5xf32>, %[[RESULT0:.*]]: memref<2xf32>, +// CHECK-SAME: %[[RESULT1:.*]]: memref<5xf32>) -> i1 +// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], +// CHECK-SAME: %[[ARG2]]) +// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) +// CHECK-SAME: {index = 0 : i32} +// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) +// CHECK-SAME: {index = 1 : i32} +// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) +// CHECK-SAME: {index = 2 : i32} +// CHECK-NEXT: test.copy(%[[FIRST_ELEM]], %[[RESULT0]]) +// CHECK-NEXT: test.copy(%[[THIRD_ELEM]], %[[RESULT1]]) // CHECK-NEXT: return %[[SECOND_ELEM]] - // CHECK-LABEL: func @caller -func @caller(%arg0: tuple,i1, tensor<5xf32>>) -> tuple,i1, tensor<5xf32>>{ - %x0 = call @callee(%arg0) : (tuple,i1, tensor<5xf32>>) -> (tuple,i1, tensor<5xf32>>) - %y0 = call @callee(%x0) : (tuple,i1, tensor<5xf32>>) -> (tuple,i1, tensor<5xf32>>) +func @caller(%arg0: tuple,i1, tensor<5xf32>>) + -> tuple,i1, tensor<5xf32>>{ + %x0 = call @callee(%arg0) : (tuple,i1, tensor<5xf32>>) + -> (tuple,i1, tensor<5xf32>>) + %y0 = call @callee(%x0) : (tuple,i1, tensor<5xf32>>) + -> (tuple,i1, tensor<5xf32>>) return %y0 : tuple,i1, tensor<5xf32>> } -// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5xf32>, %[[RESULT0:.*]]: memref<2xf32>, %[[RESULT1:.*]]: memref<5xf32>) -// CHECK-SAME: i1 -// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) -// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32} -// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32} -// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32} +// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, +// CHECK-SAME: %[[ARG2:.*]]: memref<5xf32>, %[[RESULT0:.*]]: memref<2xf32>, +// CHECK-SAME: %[[RESULT1:.*]]: memref<5xf32>) -> i1 +// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], +// CHECK-SAME: %[[ARG2]]) +// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) +// CHECK-SAME: {index = 0 : i32} +// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) +// CHECK-SAME: {index = 1 : i32} +// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) +// CHECK-SAME: {index = 2 : i32} // CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc() // CHECK-NEXT: %[[SECOND_ALLOC:.*]] = alloc() -// CHECK-NEXT: %[[CALLEE_RESULT:.*]] = call @callee(%[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]], %[[FIRST_ALLOC]], %[[SECOND_ALLOC]]) -// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>, memref<2xf32>, memref<5xf32>) -> i1 -// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[FIRST_ALLOC]], %[[CALLEE_RESULT]], %[[SECOND_ALLOC]]) -// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32} -// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32} -// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32} +// CHECK-NEXT: %[[CALLEE_RESULT:.*]] = call @callee(%[[FIRST_ELEM]], +// CHECK-SAME: %[[SECOND_ELEM]], %[[THIRD_ELEM]], %[[FIRST_ALLOC]], +// CHECK-SAME: %[[SECOND_ALLOC]]) +// CHECK-SAME: (memref<2xf32>, i1, +// CHECK-SAME: memref<5xf32>, memref<2xf32>, memref<5xf32>) -> i1 +// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[FIRST_ALLOC]], +// CHECK-SAME: %[[CALLEE_RESULT]], %[[SECOND_ALLOC]]) +// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) +// CHECK-SAME: {index = 0 : i32} +// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) +// CHECK-SAME: {index = 1 : i32} +// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) +// CHECK-SAME: {index = 2 : i32} // CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc() // CHECK-NEXT: %[[SECOND_ALLOC:.*]] = alloc() -// CHECK-NEXT: %[[CALLEE_RESULT:.*]] = call @callee(%[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]], %[[FIRST_ALLOC]], %[[SECOND_ALLOC]]) -// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>, memref<2xf32>, memref<5xf32>) -> i1 -// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[FIRST_ALLOC]], %[[CALLEE_RESULT]], %[[SECOND_ALLOC]]) -// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32} -// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32} -// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32} -// CHECK-NEXT: linalg.copy(%[[FIRST_ELEM]], %[[RESULT0]]) -// CHECK-NEXT: linalg.copy(%[[THIRD_ELEM]], %[[RESULT1]]) +// CHECK-NEXT: %[[CALLEE_RESULT:.*]] = call @callee(%[[FIRST_ELEM]], +// CHECK-SAME: %[[SECOND_ELEM]], %[[THIRD_ELEM]], %[[FIRST_ALLOC]], +// CHECK-SAME: %[[SECOND_ALLOC]]) +// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>, memref<2xf32>, memref<5xf32>) +// CHECK-SAME: i1 +// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[FIRST_ALLOC]], +// CHECK-SAME: %[[CALLEE_RESULT]], %[[SECOND_ALLOC]]) +// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) +// CHECK-SAME: {index = 0 : i32} +// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) +// CHECK-SAME: {index = 1 : i32} +// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) +// CHECK-SAME: {index = 2 : i32} +// CHECK-NEXT: test.copy(%[[FIRST_ELEM]], %[[RESULT0]]) +// CHECK-NEXT: test.copy(%[[THIRD_ELEM]], %[[RESULT1]]) // CHECK-NEXT: return %[[SECOND_ELEM]] // ----- -// Test case: Testing BufferAssignmentFuncOpConverter and -// BufferAssignmentReturnOpConverter to see if the return operation matches with -// the new function signature when there are tuple typed args and results. -// BufferAssignmentTypeConverter is set to flatten tuple typed arguments. The tuple -// typed values should be decomposed and composed using get_tuple_element and -// make_tuple operations of test dialect. Tensor types are converted to Memref. -// Memref typed function results are appended to the function arguments list. +// Test case: Testing BufferizeFuncOpConverter and BufferizeReturnOpConverter +// to see if the return operation matches with the new function signature when +// there are tuple typed args and results. BufferizeTypeConverter is set to +// flatten tuple typed arguments. The tuple typed values should be decomposed +// and composed using get_tuple_element and make_tuple operations of test +// dialect. Tensor types are converted to Memref. Memref typed function results +// are appended to the function arguments list. // CHECK-LABEL: func @decompose_tuple_typed_function_args_and_results -func @decompose_tuple_typed_function_args_and_results(%arg0: tuple, %arg1: tensor<10xf32>, %arg2: tuple>) -> (tuple>, tensor<10xf32>, tuple){ - return %arg2, %arg1, %arg0 : tuple>, tensor<10xf32>, tuple +func @decompose_tuple_typed_function_args_and_results(%arg0: tuple, + %arg1: tensor<10xf32>, + %arg2: tuple>) + -> (tuple>, + tensor<10xf32>, + tuple){ + return %arg2, %arg1, %arg0 : tuple>, tensor<10xf32>, + tuple } -// CHECK-SAME: %[[ARG0:.*]]: i1, %[[ARG1:.*]]: f32, %[[ARG2:.*]]: memref<10xf32>, %[[ARG3:.*]]: i1, %[[ARG4:.*]]: memref<5xf32>, %[[RESULT0:.*]]: memref<5xf32>, %[[RESULT1:.*]]: memref<10xf32> +// CHECK-SAME: %[[ARG0:.*]]: i1, %[[ARG1:.*]]: f32, +// CHECK-SAME: %[[ARG2:.*]]: memref<10xf32>, %[[ARG3:.*]]: i1, +// CHECK-SAME: %[[ARG4:.*]]: memref<5xf32>, %[[RESULT0:.*]]: memref<5xf32>, +// CHECK-SAME: %[[RESULT1:.*]]: memref<10xf32> // CHECK-SAME: (i1, i1, f32) // CHECK-NEXT: %[[FIRST_TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]]) // CHECK-NEXT: %[[SECOND_TUPLE:.*]] = "test.make_tuple"(%[[ARG3]], %[[ARG4]]) -// CHECK-NEXT: %[[SECOND_TUPLE_FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[SECOND_TUPLE]]) {index = 0 : i32} -// CHECK-NEXT: %[[SECOND_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[SECOND_TUPLE]]) {index = 1 : i32} -// CHECK-NEXT: %[[FIRST_TUPLE_FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[FIRST_TUPLE]]) {index = 0 : i32} -// CHECK-NEXT: %[[FIRST_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[FIRST_TUPLE]]) {index = 1 : i32} -// CHECK-NEXT: linalg.copy(%[[SECOND_TUPLE_SECOND_ELEM]], %[[RESULT0]]) -// CHECK-NEXT: linalg.copy(%[[ARG2]], %[[RESULT1]]) -// CHECK-NEXT: return %[[SECOND_TUPLE_FIRST_ELEM]], %[[FIRST_TUPLE_FIRST_ELEM]], %[[FIRST_TUPLE_SECOND_ELEM]] +// CHECK-NEXT: %[[SECOND_TUPLE_FIRST_ELEM:.*]] = "test.get_tuple_element" +// CHECK-SAME: (%[[SECOND_TUPLE]]) {index = 0 : i32} +// CHECK-NEXT: %[[SECOND_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element" +// CHECK-SAME: (%[[SECOND_TUPLE]]) {index = 1 : i32} +// CHECK-NEXT: %[[FIRST_TUPLE_FIRST_ELEM:.*]] = "test.get_tuple_element" +// CHECK-SAME: (%[[FIRST_TUPLE]]) {index = 0 : i32} +// CHECK-NEXT: %[[FIRST_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element" +// CHECK-SAME: (%[[FIRST_TUPLE]]) {index = 1 : i32} +// CHECK-NEXT: test.copy(%[[SECOND_TUPLE_SECOND_ELEM]], %[[RESULT0]]) +// CHECK-NEXT: test.copy(%[[ARG2]], %[[RESULT1]]) +// CHECK-NEXT: return %[[SECOND_TUPLE_FIRST_ELEM]], %[[FIRST_TUPLE_FIRST_ELEM]], +// CHECK-SAME: %[[FIRST_TUPLE_SECOND_ELEM]] diff --git a/mlir/test/Transforms/promote-buffers-to-stack.mlir b/mlir/test/Transforms/promote-buffers-to-stack.mlir --- a/mlir/test/Transforms/promote-buffers-to-stack.mlir +++ b/mlir/test/Transforms/promote-buffers-to-stack.mlir @@ -21,23 +21,21 @@ br ^bb3(%arg1 : memref<2xf32>) ^bb2: %0 = alloc() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } br ^bb3(%0 : memref<2xf32>) ^bb3(%1: memref<2xf32>): - "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: cond_br {{.*}} // CHECK: ^bb2 // CHECK-NEXT: %[[ALLOCA:.*]] = alloca() -// CHECK: linalg.copy +// CHECK: test.copy // CHECK-NEXT: return // ----- @@ -64,16 +62,14 @@ br ^bb3(%arg1 : memref) ^bb2(%0: index): %1 = alloc(%0) : memref - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg1: memref) - outs(%1: memref) { + test.buffer_based in(%arg1: memref) out(%1: memref) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } br ^bb3(%1 : memref) ^bb3(%2: memref): - "linalg.copy"(%2, %arg2) : (memref, memref) -> () + test.copy(%2, %arg2) : (memref, memref) return } @@ -81,10 +77,10 @@ // CHECK: ^bb2 // CHECK: ^bb2(%[[IDX:.*]]:{{.*}}) // CHECK-NEXT: %[[ALLOC0:.*]] = alloc(%[[IDX]]) -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // CHECK: br ^bb3 // CHECK-NEXT: ^bb3(%[[ALLOC0:.*]]:{{.*}}) -// CHECK: linalg.copy(%[[ALLOC0]], +// CHECK: test.copy(%[[ALLOC0]], // CHECK-NEXT: return // ----- @@ -119,23 +115,21 @@ cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>) ^bb1: %0 = alloc() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } br ^bb2(%0 : memref<2xf32>) ^bb2(%1: memref<2xf32>): - "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: cond_br {{.*}} // CHECK: ^bb1 // CHECK-NEXT: %[[ALLOCA:.*]] = alloca() -// CHECK: linalg.copy +// CHECK: test.copy // CHECK-NEXT: return // ----- @@ -153,24 +147,22 @@ // CHECK-LABEL: func @invCriticalEdge func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { %0 = alloc() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>) ^bb1: br ^bb2(%0 : memref<2xf32>) ^bb2(%1: memref<2xf32>): - "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: %[[ALLOCA:.*]] = alloca() // CHECK: cond_br -// CHECK: linalg.copy +// CHECK: test.copy // CHECK-NEXT: return // ----- @@ -188,12 +180,10 @@ // CHECK-LABEL: func @ifElse func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { %0 = alloc() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), @@ -204,22 +194,20 @@ br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>) ^bb3(%5: memref<2xf32>, %6: memref<2xf32>): %7 = alloc() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%5: memref<2xf32>) - outs(%7: memref<2xf32>) { + test.buffer_based in(%5: memref<2xf32>) out(%7: memref<2xf32>) { ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): %tmp2 = exp %gen2_arg0 : f32 - linalg.yield %tmp2 : f32 + test.buffer_tensor_yield %tmp2 : f32 } - "linalg.copy"(%7, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%7, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: %[[ALLOCA0:.*]] = alloca() -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // CHECK: %[[ALLOCA1:.*]] = alloca() -// CHECK: linalg.generic -// CHECK: linalg.copy(%[[ALLOCA1]] +// CHECK: test.buffer_based +// CHECK: test.copy(%[[ALLOCA1]] // CHECK-NEXT: return // ----- @@ -237,12 +225,10 @@ // CHECK-LABEL: func @ifElseNoUsers func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { %0 = alloc() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), @@ -252,7 +238,7 @@ ^bb2(%3: memref<2xf32>, %4: memref<2xf32>): br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>) ^bb3(%5: memref<2xf32>, %6: memref<2xf32>): - "linalg.copy"(%arg1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%arg1, %arg2) : (memref<2xf32>, memref<2xf32>) return } @@ -278,12 +264,10 @@ // CHECK-LABEL: func @ifElseNested func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { %0 = alloc() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), @@ -298,22 +282,20 @@ br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>) ^bb5(%7: memref<2xf32>, %8: memref<2xf32>): %9 = alloc() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%7: memref<2xf32>) - outs(%9: memref<2xf32>) { + test.buffer_based in(%7: memref<2xf32>) out(%9: memref<2xf32>) { ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): %tmp2 = exp %gen2_arg0 : f32 - linalg.yield %tmp2 : f32 + test.buffer_tensor_yield %tmp2 : f32 } - "linalg.copy"(%9, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%9, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: %[[ALLOCA0:.*]] = alloca() -// CHECK-NEXT: linalg.generic +// CHECK-NEXT: test.buffer_based // CHECK: %[[ALLOCA1:.*]] = alloca() -// CHECK: linalg.generic -// CHECK: linalg.copy(%[[ALLOCA1]] +// CHECK: test.buffer_based +// CHECK: test.copy(%[[ALLOCA1]] // CHECK-NEXT: return // ----- @@ -327,29 +309,25 @@ // CHECK-LABEL: func @redundantOperations func @redundantOperations(%arg0: memref<2xf32>) { %0 = alloc() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg0: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } %1 = alloc() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%0: memref<2xf32>) - outs(%1: memref<2xf32>) { + test.buffer_based in(%0: memref<2xf32>) out(%1: memref<2xf32>) { ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): %tmp2 = exp %gen2_arg0 : f32 - linalg.yield %tmp2 : f32 + test.buffer_tensor_yield %tmp2 : f32 } return } // CHECK: (%[[ARG0:.*]]: {{.*}}) // CHECK-NEXT: %[[ALLOCA0:.*]] = alloca() -// CHECK-NEXT: linalg.generic {{{.*}}} ins(%[[ARG0]]{{.*}} outs(%[[ALLOCA0]] +// CHECK-NEXT: test.buffer_based in(%[[ARG0]]{{.*}} out(%[[ALLOCA0]] // CHECK: %[[ALLOCA1:.*]] = alloca() -// CHECK-NEXT: linalg.generic {{{.*}}} ins(%[[ALLOCA0]]{{.*}} outs(%[[ALLOCA1]] +// CHECK-NEXT: test.buffer_based in(%[[ALLOCA0]]{{.*}} out(%[[ALLOCA1]] // CHECK: return // ----- @@ -373,26 +351,22 @@ cond_br %cond, ^bb1, ^bb2 ^bb1: %0 = alloc() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg0: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } br ^exit(%0 : memref<2xf32>) ^bb2: %1 = alloc() : memref<2xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg0: memref<2xf32>) - outs(%1: memref<2xf32>) { + test.buffer_based in(%arg0: memref<2xf32>) out(%1: memref<2xf32>) { ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): %tmp2 = exp %gen2_arg0 : f32 - linalg.yield %tmp2 : f32 + test.buffer_tensor_yield %tmp2 : f32 } br ^exit(%1 : memref<2xf32>) ^exit(%arg2: memref<2xf32>): - "linalg.copy"(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>) return } @@ -401,13 +375,13 @@ // CHECK-NEXT: %{{.*}} = alloca() // CHECK: ^bb2 // CHECK-NEXT: %{{.*}} = alloca() -// CHECK: linalg.copy +// CHECK: test.copy // CHECK-NEXT: return // ----- -// Test Case: Nested regions - This test defines a GenericOp inside the region -// of another GenericOp. +// Test Case: Nested regions - This test defines a BufferBasedOp inside the region +// of another BufferBasedOp. // PromoteBuffersToStack expected behavior: The AllocOps are converted into // allocas. @@ -423,28 +397,20 @@ br ^bb3(%arg1 : memref<2xf32>) ^bb2: %0 = alloc() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%0: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %1 = alloc() : memref<2xf32> - linalg.generic { - indexing_maps = [#map0, #map0], - iterator_types = ["parallel"]} - ins(%arg1: memref<2xf32>) - outs(%1: memref<2xf32>) { + test.buffer_based in(%arg1: memref<2xf32>) out(%1: memref<2xf32>) { ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): %tmp2 = exp %gen2_arg0 : f32 - linalg.yield %tmp2 : f32 + test.buffer_tensor_yield %tmp2 : f32 } %tmp1 = exp %gen1_arg0 : f32 - linalg.yield %tmp1 : f32 + test.buffer_tensor_yield %tmp1 : f32 } br ^bb3(%0 : memref<2xf32>) ^bb3(%1: memref<2xf32>): - "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) return } @@ -470,21 +436,19 @@ %arg2: memref<5xf32>) -> (memref<10xf32>, memref<15xf32>) { %x = alloc() : memref<15xf32> %y = alloc() : memref<5xf32> - linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg0: memref<5xf32>) - outs(%y: memref<5xf32>) { + test.buffer_based in(%arg0: memref<5xf32>) out(%y: memref<5xf32>) { ^bb0(%arg3: f32, %arg4: f32): %2 = exp %arg3 : f32 - linalg.yield %2 : f32 + test.buffer_tensor_yield %2 : f32 } - linalg.copy(%y, %arg2) : memref<5xf32>, memref<5xf32> + test.copy(%y, %arg2) : (memref<5xf32>, memref<5xf32>) return %arg1, %x : memref<10xf32>, memref<15xf32> } // CHECK: (%[[ARG0:.*]]: memref<5xf32>, %[[ARG1:.*]]: memref<10xf32>, // CHECK-SAME: %[[RESULT:.*]]: memref<5xf32>) // CHECK: %[[ALLOC:.*]] = alloc() // CHECK: %[[ALLOCA:.*]] = alloca() -// CHECK: linalg.copy +// CHECK: test.copy // CHECK: return %[[ARG1]], %[[ALLOC]] // ----- @@ -566,7 +530,7 @@ %3 = alloc() : memref<2xf32> scf.yield %3 : memref<2xf32> } - "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>) return } @@ -600,7 +564,7 @@ } scf.yield %3 : memref<2xf32> } - "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>) return } @@ -610,7 +574,7 @@ // CHECK: scf.yield %[[ALLOCA0]] // CHECK: scf.yield %[[IALLOCA]] // CHECK: scf.yield %[[ALLOCA2]] -// CHECK: linalg.copy(%[[ALLOCA1]], %arg4) +// CHECK: test.copy(%[[ALLOCA1]], %arg4) // ----- @@ -656,9 +620,9 @@ // CHECK-LABEL: func @large_buffer_allocation func @large_buffer_allocation(%arg0: memref<2048xf32>) { %0 = alloc() : memref<2048xf32> - "linalg.copy"(%0, %arg0) : (memref<2048xf32>, memref<2048xf32>) -> () + test.copy(%0, %arg0) : (memref<2048xf32>, memref<2048xf32>) return } // CHECK-NEXT: %[[ALLOC:.*]] = alloc() -// CHECK-NEXT: linalg.copy +// CHECK-NEXT: test.copy diff --git a/mlir/test/lib/Dialect/Test/TestDialect.h b/mlir/test/lib/Dialect/Test/TestDialect.h --- a/mlir/test/lib/Dialect/Test/TestDialect.h +++ b/mlir/test/lib/Dialect/Test/TestDialect.h @@ -24,6 +24,7 @@ #include "mlir/IR/SymbolTable.h" #include "mlir/Interfaces/CallInterfaces.h" #include "mlir/Interfaces/ControlFlowInterfaces.h" +#include "mlir/Interfaces/CopyOpInterface.h" #include "mlir/Interfaces/DerivedAttributeOpInterface.h" #include "mlir/Interfaces/InferTypeOpInterface.h" #include "mlir/Interfaces/SideEffectInterfaces.h" diff --git a/mlir/test/lib/Dialect/Test/TestDialect.cpp b/mlir/test/lib/Dialect/Test/TestDialect.cpp --- a/mlir/test/lib/Dialect/Test/TestDialect.cpp +++ b/mlir/test/lib/Dialect/Test/TestDialect.cpp @@ -891,4 +891,4 @@ #include "TestTypeInterfaces.cpp.inc" #define GET_OP_CLASSES -#include "TestOps.cpp.inc" +#include "TestOps.cpp.inc" \ No newline at end of file diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -13,9 +13,9 @@ include "mlir/IR/OpAsmInterface.td" include "mlir/IR/RegionKindInterface.td" include "mlir/IR/SymbolInterfaces.td" -include "mlir/Interfaces/SideEffectInterfaces.td" include "mlir/Interfaces/CallInterfaces.td" include "mlir/Interfaces/ControlFlowInterfaces.td" +include "mlir/Interfaces/CopyOpInterface.td" include "mlir/Interfaces/InferTypeOpInterface.td" include "mlir/Interfaces/SideEffectInterfaces.td" @@ -1693,6 +1693,70 @@ let results = (outs AnyType:$result); } +//===----------------------------------------------------------------------===// +// Test CopyOpInterface +//===----------------------------------------------------------------------===// + +def CopyOp : TEST_Op<"copy", [CopyOpInterface]> { + let description = [{ + Represents a copy operation. + }]; + let arguments = (ins Res:$source, + Res:$target); + let assemblyFormat = [{ + `(` $source `,` $target `)` `:` `(` type($source) `,` type($target) `)` + attr-dict + }]; + let extraClassDeclaration = [{ + Value getSource() { return source(); } + Value getTarget() { return target(); } + }]; +} + +//===----------------------------------------------------------------------===// +// Test Buffer/Tensor +//===----------------------------------------------------------------------===// + +def BufferTensorYieldOp : TEST_Op<"buffer_tensor_yield", + [NoSideEffect, ReturnLike, Terminator]> { + let description = [{ + This operation is used in the region of either a BufferBasedOp or + TensorBasedOp. It yields the corresponding type. + }]; + let arguments = (ins AnyType:$result); + let assemblyFormat = [{ + $result `:` type($result) attr-dict + }]; + let builders = [OpBuilderDAG<(ins), + [{ build($_builder, $_state, {}); }]> + ]; +} + +def BufferBasedOp : TEST_Op<"buffer_based", + [SingleBlockImplicitTerminator<"BufferTensorYieldOp">]> { + let description = [{ + A buffer based operation, that uses memRefs as input and output. + }]; + let arguments = (ins AnyMemRef:$input, AnyMemRef:$output); + let regions = (region AnyRegion:$region); + let assemblyFormat = [{attr-dict + `in` `(` $input`:` type($input) `)` `out` `(` $output`:` type($output) `)` + $region}]; +} + +def TensorBasedOp : TEST_Op<"tensor_based", + [SingleBlockImplicitTerminator<"BufferTensorYieldOp">]> { + let description = [{ + A tensor based operation, that uses a tensor as an input and results in a + tensor again. + }]; + let arguments = (ins AnyRankedTensor:$input); + let results = (outs AnyRankedTensor:$result); + let regions = (region AnyRegion:$region); + let assemblyFormat = [{attr-dict `in` `(` $input`:` type($input) `)` + $region `->` type($result)}]; +} + //===----------------------------------------------------------------------===// // Test RegionBranchOpInterface //===----------------------------------------------------------------------===// diff --git a/mlir/test/lib/Transforms/TestBufferPlacement.cpp b/mlir/test/lib/Transforms/TestBufferPlacement.cpp --- a/mlir/test/lib/Transforms/TestBufferPlacement.cpp +++ b/mlir/test/lib/Transforms/TestBufferPlacement.cpp @@ -13,7 +13,7 @@ #include "TestDialect.h" #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" -#include "mlir/Dialect/Linalg/IR/LinalgOps.h" +#include "mlir/IR/BlockAndValueMapping.h" #include "mlir/IR/Function.h" #include "mlir/IR/Operation.h" #include "mlir/Pass/Pass.h" @@ -24,8 +24,8 @@ namespace { /// This pass tests the computeAllocPosition helper method and bufferize -/// operation converters. Furthermore, this pass converts linalg operations on -/// tensors to linalg operations on buffers to prepare them for the +/// operation converters. Furthermore, this pass converts test operations on +/// tensors to test operations on buffers to prepare them for the /// BufferPlacement pass that can be applied afterwards. /// `allowMemrefFunctionResults` informs the buffer placement to allow functions /// that have memref typed results. Buffer assignment operation converters will @@ -37,97 +37,46 @@ TestBufferPlacementPreparationPass, OperationPass> { - /// Converts tensor-type generic linalg operations to memref ones using + /// Converts tensor based test operations to buffer based ones using /// bufferize. - /// TODO: Avoid the copy-pasta by exposing the pattern from BufferPlacement.h - /// This is limited by not wanting BufferPlacement to depend on Linalg. Fixing - /// this probably requires an OpConversionPattern over generic Operation*. For - /// now only RewritePattern but not ConversionPattern allow this. - - class GenericOpConverter - : public BufferizeOpConversionPattern { + class TensorBasedOpConverter + : public BufferizeOpConversionPattern { public: using BufferizeOpConversionPattern< - linalg::GenericOp>::BufferizeOpConversionPattern; + mlir::TensorBasedOp>::BufferizeOpConversionPattern; LogicalResult - matchAndRewrite(linalg::GenericOp op, ArrayRef operands, + matchAndRewrite(mlir::TensorBasedOp op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { - linalg::GenericOpAdaptor adaptor(operands, - op.getOperation()->getAttrDictionary()); - - // All inputs need to be turned into buffers first. Until then, bail out. - if (llvm::any_of(adaptor.inputs(), [](Value in) { - return !in.getType().isa(); - })) - return failure(); + mlir::TensorBasedOpAdaptor adaptor( + operands, op.getOperation()->getAttrDictionary()); - // All init_tensors need to be turned into buffers first. Until then, bail - // out. - if (llvm::any_of(adaptor.init_tensors(), [](Value in) { - return !in.getType().isa(); - })) + // The input needs to be turned into a buffer first. Until then, bail out. + if (!adaptor.input().getType().isa()) return failure(); Location loc = op.getLoc(); - SmallVector newOutputBuffers; - newOutputBuffers.reserve(op.getNumOutputs()); - newOutputBuffers.append(adaptor.output_buffers().begin(), - adaptor.output_buffers().end()); - // Update all types to memref types. - // Assume the init tensors fold onto the first results. - // TODO: update this assumption because the reality is more complex under - // linalg on tensor based transformations. - for (auto en : llvm::enumerate(op.getResultTypes())) { - auto type = en.value().cast(); - if (!type.hasStaticShape()) - return rewriter.notifyMatchFailure( - op, "dynamic shapes not currently supported"); - auto memrefType = - MemRefType::get(type.getShape(), type.getElementType()); - bool foldedInitTensor = en.index() < op.getNumInitTensors(); - if (foldedInitTensor) { - // Dealing with an init tensor requires distinguishing between 1-use - // and many-use cases which would create aliasing and WAR hazards. - Value initTensor = op.getInitTensor(en.index()); - Value initBuffer = adaptor.init_tensors()[en.index()]; - if (initTensor.hasOneUse()) { - newOutputBuffers.push_back(initBuffer); - continue; - } - auto alloc = rewriter.create(loc, memrefType); - rewriter.create(loc, initBuffer, alloc); - newOutputBuffers.push_back(alloc); - } else { - auto alloc = rewriter.create(loc, memrefType); - newOutputBuffers.push_back(alloc); - } - } - - // Generate a new linalg operation that works on buffers. - auto linalgOp = rewriter.create( - loc, - /*resultTensorTypes=*/ArrayRef{}, - /*inputs=*/adaptor.inputs(), - /*outputBuffers=*/newOutputBuffers, - /*initTensors=*/ValueRange{}, op.indexing_maps(), op.iterator_types(), - op.docAttr(), op.library_callAttr(), op.symbol_sourceAttr()); - - // Create a new block in the region of the new Generic Op. + // Update the result type to a memref type. + auto type = op.getResult().getType().cast(); + if (!type.hasStaticShape()) + return rewriter.notifyMatchFailure( + op, "dynamic shapes not currently supported"); + auto memrefType = MemRefType::get(type.getShape(), type.getElementType()); + Value newOutputBuffer = rewriter.create(loc, memrefType); + + // Generate a new test operation that works on buffers. + auto testOp = + rewriter.create(loc, + /*input=*/adaptor.input(), + /*output=*/newOutputBuffer); + + // Create a new block in the region of the new BufferBasedOp. Block &oldBlock = op.getRegion().front(); - Region &newRegion = linalgOp.region(); + Region &newRegion = testOp.region(); Block *newBlock = rewriter.createBlock(&newRegion, newRegion.begin(), oldBlock.getArgumentTypes()); - // Add the result arguments that do not come from init_tensors to the new - // block. - // TODO: update this assumption because the reality is more complex under - // linalg on tensor based transformations. - for (Value v : ValueRange(newOutputBuffers) - .drop_front(adaptor.init_tensors().size())) - newBlock->addArgument(v.getType().cast().getElementType()); - // Clone the body of the old block to the new block. BlockAndValueMapping mapping; for (unsigned i = 0; i < oldBlock.getNumArguments(); i++) @@ -141,23 +90,22 @@ } // Replace the results of the old op with the new output buffers. - rewriter.replaceOp(op, newOutputBuffers); + rewriter.replaceOp(op, newOutputBuffer); return success(); } }; - void populateTensorLinalgToBufferLinalgConversionPattern( + void populateTestTensorToBufferConversionPattern( MLIRContext *context, BufferizeTypeConverter &converter, OwningRewritePatternList &patterns) { populateWithBufferizeOpConversionPatterns( - context, converter, patterns); - patterns.insert(context, converter); + mlir::CopyOp>(context, converter, + patterns); + patterns.insert(context, converter); } void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); - registry.insert(); } void runOnOperation() override { @@ -172,11 +120,11 @@ target.addLegalOp(); target.addLegalOp(); - // Mark all Linalg operations illegal as long as they work on tensors. + // Mark all Test operations illegal as long as they work on tensors. auto isLegalOperation = [&](Operation *op) { return converter.isLegal(op); }; - target.addDynamicallyLegalDialect(isLegalOperation); + target.addDynamicallyLegalDialect(isLegalOperation); // Mark Standard Return operations illegal as long as one operand is tensor. target.addDynamicallyLegalOp([&](mlir::ReturnOp returnOp) { @@ -230,8 +178,7 @@ }); OwningRewritePatternList patterns; - populateTensorLinalgToBufferLinalgConversionPattern(&context, converter, - patterns); + populateTestTensorToBufferConversionPattern(&context, converter, patterns); if (failed(applyFullConversion(this->getOperation(), target, std::move(patterns)))) this->signalPassFailure(); @@ -255,4 +202,4 @@ "Tests the helper operation converters of buffer placement for allowing " "functions to have memref typed results."); } -} // end namespace mlir +} // end namespace mlir \ No newline at end of file