diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@@ -279,6 +279,9 @@
          "expected that op allocates");
 
   AnalysisState analysisState(options);
+  if (analysisState.isTensorYielded(opResult))
+    return false;
+
   if (op->hasAttr(BufferizationDialect::kEscapeAttrName)) {
     // AllocTensorOp has one result.
     ArrayAttr escapeAttr =
@@ -287,12 +290,7 @@
   }
 
   // No "escape" annotation found.
-  if (options.createDeallocs) {
-    // Perform an ad-hoc analysis.
-    return !analysisState.isTensorYielded(opResult);
-  }
-
-  return false;
+  return options.createDeallocs;
 }
 
 //===----------------------------------------------------------------------===//
@@ -611,9 +609,13 @@
     if (isa<ToMemrefOp>(op))
       return true;
 
-    // Check if the op is returning/yielding.
-    if (isRegionReturnLike(op))
-      return true;
+    // Check if the op is recursively returning/yielding.
+    Operation *parentOp = op;
+    do {
+      if (isRegionReturnLike(parentOp) ||
+          parentOp->hasTrait<OpTrait::IsTerminator>())
+        return true;
+    } while ((parentOp = parentOp->getParentOp()));
 
     // Add all aliasing OpResults to the worklist.
     // Note: In the absence of detailed analysis information (e.g., there may be
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir
@@ -199,3 +199,26 @@
   %3 = tensor.extract %0[%pos3] : tensor<100xf32>
   return %2, %3 : f32, f32
 }
+
+// -----
+
+// CHECK-LABEL: func @ternimator_use_not_deallocated
+#map = affine_map<(d0) -> (d0 * 5)>
+func.func @ternimator_use_not_deallocated(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> {
+  // CHECK: %[[alloc:.*]] = memref.alloc
+  // CHECK: memref.copy {{.*}} %[[alloc]]
+  // CHECK: scf.forall ({{.*}}) in (2, 2) {
+  %0 = scf.forall (%arg1, %arg2) in (2, 2) shared_outs(%arg3 = %arg0) -> (tensor<10x10xf32>) {
+    // CHECK: %[[local_alloc:.*]] = memref.alloc
+    // CHECK-NOT: memref.dealloc
+    // CHECK: %[[subview:.*]] = memref.subview %[[alloc]]
+    // CHECK: memref.copy %[[local_alloc]], %[[subview]]
+    %1 = bufferization.alloc_tensor() : tensor<5x5xf32>
+    %2 = affine.apply #map(%arg1)
+    %3 = affine.apply #map(%arg2)
+    scf.forall.in_parallel {
+      tensor.parallel_insert_slice %1 into %arg3[%2, %3] [5, 5] [1, 1] : tensor<5x5xf32> into tensor<10x10xf32>
+    }
+  }
+  return %0 : tensor<10x10xf32>
+}