diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp --- a/mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp @@ -441,8 +441,17 @@ // TODO: Parallelism and threadlocal considerations with a ParallelScope trait. static Operation *getAutomaticAllocationScope(Operation *op) { - Operation *scope = - op->getParentWithTrait(); + // Find the closest surrounding allocation scope that is not a known looping + // construct (putting alloca's in loops doesn't always lower to deallocation + // until the end of the loop). + Operation *scope = nullptr; + for (Operation *parent = op->getParentOp(); parent != nullptr; + parent = parent->getParentOp()) { + if (parent->hasTrait()) + scope = parent; + if (!isa(parent)) + break; + } assert(scope && "Expected op to be inside automatic allocation scope"); return scope; } diff --git a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir --- a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir @@ -412,3 +412,23 @@ } return %token : !async.token } + +// ----- + +func.func private @fake_side_effecting_fun(%0: vector<2x2xf32>) -> () + +// Ensure that `alloca`s are inserted outside of loops even though loops are +// consdered allocation scopes. +// CHECK-LABEL: transfer_read_within_scf_for +func.func @transfer_read_within_scf_for(%A : memref, %lb : index, %ub : index, %step : index) { + %c0 = arith.constant 0 : index + %f0 = arith.constant 0.0 : f32 + // CHECK: alloca + // CHECK: scf.for + // CHECK-NOT: alloca + scf.for %i = %lb to %ub step %step { + %0 = vector.transfer_read %A[%c0, %c0], %f0 : memref, vector<2x2xf32> + func.call @fake_side_effecting_fun(%0) : (vector<2x2xf32>) -> () + } + return +}