diff --git a/mlir/lib/Dialect/Arith/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Arith/Transforms/BufferizableOpInterfaceImpl.cpp --- a/mlir/lib/Dialect/Arith/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Arith/Transforms/BufferizableOpInterfaceImpl.cpp @@ -155,10 +155,12 @@ bufferization::getBufferType(selectOp.getResult(), options); if (failed(targetType)) return failure(); - trueBuffer = - rewriter.create(loc, *targetType, trueBuffer); - falseBuffer = - rewriter.create(loc, *targetType, falseBuffer); + if (trueBuffer.getType() != *targetType) + trueBuffer = + rewriter.create(loc, *targetType, trueBuffer); + if (falseBuffer.getType() != *targetType) + falseBuffer = + rewriter.create(loc, *targetType, falseBuffer); } replaceOpWithNewBufferizedOp( diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp --- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp @@ -269,15 +269,6 @@ this->numBufferDealloc = statistics.numBufferDealloc; this->numTensorInPlace = statistics.numTensorInPlace; this->numTensorOutOfPlace = statistics.numTensorOutOfPlace; - - if (opt.testAnalysisOnly) - return; - - OpPassManager cleanupPipeline("builtin.module"); - cleanupPipeline.addPass(createCanonicalizerPass()); - cleanupPipeline.addPass(createCSEPass()); - cleanupPipeline.addPass(createLoopInvariantCodeMotionPass()); - (void)runPipeline(cleanupPipeline, moduleOp); } private: diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -eliminate-empty-tensors -empty-tensor-to-alloc-tensor -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs" -canonicalize -split-input-file | FileCheck %s +// RUN: mlir-opt %s -eliminate-empty-tensors -empty-tensor-to-alloc-tensor -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs" -cse -canonicalize -split-input-file | FileCheck %s // CHECK: func @buffer_forwarding_conflict( // CHECK-SAME: %[[FUNC_ARG:[0-9a-zA-Z]*]]: memref @@ -101,7 +101,6 @@ %c5 = arith.constant 5 : index // CHECK-NOT: memref.alloc - // CHECK: %[[subview:.*]] = memref.subview %[[t]][%[[idx]]] [5] [1] %blank = tensor.empty() : tensor<5xf32> // CHECK: scf.for %[[iv:.*]] = %{{.*}} to %[[sz]] step %{{.*}} { @@ -109,6 +108,7 @@ %iv_i32 = arith.index_cast %iv : index to i32 %f = arith.sitofp %iv_i32 : i32 to f32 + // CHECK: %[[subview:.*]] = memref.subview %[[t]][%[[idx]]] [5] [1] // CHECK: linalg.fill ins(%{{.*}}{{.*}}outs(%[[subview]] %filled = linalg.fill ins(%f : f32) outs(%blank : tensor<5xf32>) -> tensor<5xf32> diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir @@ -172,7 +172,7 @@ %0 = "test.dummy_op"(%t1) : (tensor) -> (tensor) // The result of an unknown op is not writable. Always generate a copy. - // CHECK: %[[dim:.*]] = tensor.dim %[[dummy]] + // CHECK: %[[dim:.*]] = memref.dim %[[dummy_memref]] // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]]) // CHECK: memref.copy %[[dummy_memref]], %[[alloc]] // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir @@ -55,7 +55,7 @@ %c0 = arith.constant 0 : index // CHECK: %[[A_memref:.*]] = bufferization.to_memref %[[A]] - // CHECK: %[[dim:.*]] = tensor.dim %[[A]] + // CHECK: %[[dim:.*]] = memref.dim %[[A_memref]] // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]]) // CHECK: memref.copy %[[A_memref]], %[[alloc]] // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] @@ -202,9 +202,12 @@ // ----- -// CHECK-LABEL: func @from_unranked_to_unranked +// CHECK-LABEL: func @from_unranked_to_unranked( +// CHECK-SAME: %[[arg0:.*]]: tensor<*xi32> func.func @from_unranked_to_unranked(%arg0: tensor<*xi32>) -> tensor<*xi32> { - // CHECK: return %arg{{.*}} : tensor<*xi32> + // CHECK: %[[m:.*]] = bufferization.to_memref %[[arg0]] : memref<*xi32> + // CHECK: %[[t:.*]] = bufferization.to_tensor %[[m]] + // CHECK: return %[[t]] : tensor<*xi32> %0 = tensor.cast %arg0 : tensor<*xi32> to tensor<*xi32> return %0 : tensor<*xi32> } diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir @@ -1,5 +1,5 @@ // Note: Default is function-boundary-type-conversion=infer-layout-map -// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs" -drop-equivalent-buffer-results -split-input-file | FileCheck %s +// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs" -canonicalize -drop-equivalent-buffer-results -split-input-file | FileCheck %s // Run fuzzer with different seeds. // RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir --- a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -buffer-loop-hoisting -drop-equivalent-buffer-results -split-input-file | FileCheck %s +// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -canonicalize -buffer-loop-hoisting -drop-equivalent-buffer-results -split-input-file | FileCheck %s // Run fuzzer with different seeds. // RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=23 bufferize-function-boundaries" -split-input-file -o /dev/null diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir --- a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -drop-equivalent-buffer-results -buffer-deallocation -split-input-file | FileCheck %s +// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -cse -canonicalize -drop-equivalent-buffer-results -buffer-deallocation -split-input-file | FileCheck %s // RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -drop-equivalent-buffer-results -split-input-file | FileCheck %s --check-prefix=CHECK-NO-DEALLOC-PASS // Run fuzzer with different seeds. @@ -101,19 +101,18 @@ // CHECK: %[[ALLOC_FOR_A:.*]] = memref.alloc // CHECK: memref.copy %[[A]], %[[ALLOC_FOR_A]] - // CHECK: %[[svA:.*]] = memref.subview %[[ALLOC_FOR_A]][0] [4] [1] - // CHECK: %[[svB:.*]] = memref.subview %[[B]][0] [4] [1] - // CHECK: scf.for {{.*}} // CHECK-NOT: iter_args %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B) -> (tensor, tensor) { // %ttA bufferizes to direct copy of %BUFFER_CAST_C into %svA + // CHECK: %[[svA:.*]] = memref.subview %[[ALLOC_FOR_A]][0] [4] [1] // CHECK: memref.copy %[[C]], %[[svA]] %ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor // %ttB bufferizes to direct copy of %BUFFER_CAST_C into %BUFFER_CAST_B + // CHECK: %[[svB:.*]] = memref.subview %[[B]][0] [4] [1] // CHECK: memref.copy %[[C]], %[[svB]] %ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor @@ -545,14 +544,13 @@ %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index - // CHECK: %[[subview:.*]] = memref.subview %[[arg2]][5] [%[[idx]]] [1] // CHECK: scf.forall (%[[tidx:.*]]) in (%[[idx2]]) %2 = scf.forall (%arg3) in (%idx2) shared_outs(%o = %arg2) -> (tensor) { + // CHECK: %[[subview:.*]] = memref.subview %[[arg2]][5] [%[[idx]]] [1] %6 = tensor.extract_slice %o[5] [%idx] [%c1] : tensor to tensor // CHECK: linalg.fill ins(%{{.*}}) outs(%[[subview]] : memref) -> tensor - // Self-copy will DCE away later. - // CHECK: memref.copy %[[subview]], %[[subview]] + // CHECK-NOT: memref.copy // Empty terminator is elided from pretty-printing. // CHECK-NOT: scf.forall.in_parallel @@ -591,16 +589,14 @@ // CHECK: %[[alloc1:.*]] = memref.alloc // CHECK: memref.copy %[[arg2]], %[[alloc1]] - // CHECK: %[[subview1:.*]] = memref.subview %[[alloc1]][5] [%[[idx]]] [1] // CHECK: scf.forall (%[[tidx:.*]]) in (%[[idx2]]) %2 = scf.forall (%arg3) in (%idx2) shared_outs(%o = %arg2) -> (tensor) { + // CHECK: %[[subview1:.*]] = memref.subview %[[alloc1]][5] [%[[idx]]] [1] %6 = tensor.extract_slice %o[5] [%idx] [%c1] : tensor to tensor // CHECK: linalg.fill ins(%{{.*}}) outs(%[[subview1]] : memref) -> tensor - - // Now the copy of the actual insert_slice. (It will fold away.) - // CHECK: memref.copy %[[subview1]], %[[subview1]] + // CHECK-NOT: memref.copy // Empty terminator is elided from pretty-printing. // CHECK-NOT: scf.forall.in_parallel diff --git a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir --- a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir @@ -131,8 +131,9 @@ // CHECK-LABEL: func @tensor_cast_not_in_place( // CHECK-SAME: %[[A:.*]]: memref, %[[B:.*]]: memref +// CHECK: %[[casted:.*]] = memref.cast %[[A]] : memref> to memref<4xf32, strided<[?], offset: ?>> // CHECK: %[[alloc:.*]] = memref.alloc -// CHECK: memref.copy %[[A]], %[[alloc]] +// CHECK: memref.copy %[[casted]], %[[alloc]] // CHECK: %[[subview:.*]] = memref.subview %[[A]][{{.*}}] [4] [1] : {{.*}} to memref<4xf32 // CHECK: memref.copy %[[alloc]], %[[subview]] func.func @tensor_cast_not_in_place( @@ -326,7 +327,8 @@ vector.print %vec : vector<10xf32> // Write back a different value (not %1). - // CHECK: memref.copy %[[b]], %[[t]] + // CHECK: %[[subview:.*]] = memref.subview %[[t]][0] [10] [1] + // CHECK: memref.copy %[[b]], %[[subview]] %2 = tensor.insert_slice %b into %t[0][10][1] : tensor<10xf32> into tensor<10xf32> return %2 : tensor<10xf32> }