diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@@ -482,11 +482,17 @@
 
   while (!workingSet.empty()) {
     Value value = workingSet.pop_back_val();
-    if (condition(value) || value.isa<BlockArgument>()) {
+    if (condition(value)) {
       result.insert(value);
       continue;
     }
 
+    if (value.isa<BlockArgument>()) {
+      if (alwaysIncludeLeaves)
+        result.insert(value);
+      continue;
+    }
+
     OpResult opResult = value.cast<OpResult>();
     BufferizableOpInterface bufferizableOp =
         options.dynCastBufferizableOp(opResult.getDefiningOp());
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir
@@ -220,4 +220,14 @@
   %2 = tensor.insert_slice %1 into %t2[1] [5] [1]
       : tensor<5xf32> into tensor<10xf32>
   return %2 : tensor<10xf32>
-}
\ No newline at end of file
+}
+
+// -----
+
+// This is a regression test. Make sure that there is no crash.
+
+// CHECK-LABEL: func.func @regression_insert_of_bbarg(
+func.func @regression_insert_of_bbarg(%t0: tensor<5xf32>, %t1: tensor<10xf32>) -> tensor<10xf32> {
+  %0 = tensor.insert_slice %t0 into %t1 [2] [5] [1] : tensor<5xf32> into tensor<10xf32>
+  return %0 : tensor<10xf32>
+}
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
--- a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
@@ -707,11 +707,11 @@
   scf.for %iv = %a to %b step %c {
     // Must bufferize out-of-place because definition of read is in a different
     // repetitive region.
-    // CHECK: tensor.extract_slice {{.*}} {__inplace_operands_attr__ = ["false"]}
+    // CHECK: tensor.extract_slice {{.*}} {__inplace_operands_attr__ = ["true"]}
     %2 = tensor.extract_slice %t[0][4][1] : tensor<10xf32> to tensor<4xf32>
     %3 = tensor.extract %2[%a] : tensor<4xf32>
     vector.print %3 : f32
-    // CHECK: tensor.insert {{.*}} {__inplace_operands_attr__ = ["none", "true", "none"]}
+    // CHECK: tensor.insert {{.*}} {__inplace_operands_attr__ = ["none", "false", "none"]}
     %4 = tensor.insert %cst into %2[%a] : tensor<4xf32>
     %5 = tensor.extract %4[%a] : tensor<4xf32>
     vector.print %5 : f32