diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp --- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp @@ -279,6 +279,9 @@ "expected that op allocates"); AnalysisState analysisState(options); + if (analysisState.isTensorYielded(opResult)) + return false; + if (op->hasAttr(BufferizationDialect::kEscapeAttrName)) { // AllocTensorOp has one result. ArrayAttr escapeAttr = @@ -287,12 +290,7 @@ } // No "escape" annotation found. - if (options.createDeallocs) { - // Perform an ad-hoc analysis. - return !analysisState.isTensorYielded(opResult); - } - - return false; + return options.createDeallocs; } //===----------------------------------------------------------------------===// @@ -611,9 +609,13 @@ if (isa(op)) return true; - // Check if the op is returning/yielding. - if (isRegionReturnLike(op)) - return true; + // Check if the op is recursively returning/yielding. + Operation *parentOp = op; + do { + if (isRegionReturnLike(parentOp) || + parentOp->hasTrait()) + return true; + } while ((parentOp = parentOp->getParentOp())); // Add all aliasing OpResults to the worklist. // Note: In the absence of detailed analysis information (e.g., there may be diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir @@ -199,3 +199,26 @@ %3 = tensor.extract %0[%pos3] : tensor<100xf32> return %2, %3 : f32, f32 } + +// ----- + +// CHECK-LABEL: func @ternimator_use_not_deallocated +#map = affine_map<(d0) -> (d0 * 5)> +func.func @ternimator_use_not_deallocated(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> { + // CHECK: %[[alloc:.*]] = memref.alloc + // CHECK: memref.copy {{.*}} %[[alloc]] + // CHECK: scf.forall ({{.*}}) in (2, 2) { + %0 = scf.forall (%arg1, %arg2) in (2, 2) shared_outs(%arg3 = %arg0) -> (tensor<10x10xf32>) { + // CHECK: %[[local_alloc:.*]] = memref.alloc + // CHECK-NOT: memref.dealloc + // CHECK: %[[subview:.*]] = memref.subview %[[alloc]] + // CHECK: memref.copy %[[local_alloc]], %[[subview]] + %1 = bufferization.alloc_tensor() : tensor<5x5xf32> + %2 = affine.apply #map(%arg1) + %3 = affine.apply #map(%arg2) + scf.forall.in_parallel { + tensor.parallel_insert_slice %1 into %arg3[%2, %3] [5, 5] [1, 1] : tensor<5x5xf32> into tensor<10x10xf32> + } + } + return %0 : tensor<10x10xf32> +}