diff --git a/mlir/lib/Transforms/BufferPlacement.cpp b/mlir/lib/Transforms/BufferPlacement.cpp
--- a/mlir/lib/Transforms/BufferPlacement.cpp
+++ b/mlir/lib/Transforms/BufferPlacement.cpp
@@ -48,11 +48,10 @@
 // will be freed in the end.
 //
 // TODO:
-// The current implementation does not support loops and the resulting code will
-// be invalid with respect to program semantics. The only thing that is
-// currently missing is a high-level loop analysis that allows us to move allocs
-// and deallocs outside of the loop blocks. Furthermore, it doesn't also accept
-// functions which return buffers already.
+// The current implementation does not support explicit-control-flow loops and
+// the resulting code will be invalid with respect to program semantics.
+// However, structured control-flow loops are fully supported. Furthermore, it
+// doesn't accept functions which return buffers already.
 //
 //===----------------------------------------------------------------------===//
 
@@ -77,6 +76,22 @@
     }
 }
 
+/// Wrapper for the actual `RegionBranchOpInterface.getSuccessorRegions`
+/// function that initializes the required `operandAttributes` array.
+static void getSuccessorRegions(RegionBranchOpInterface regionInterface,
+                                llvm::Optional<unsigned> index,
+                                SmallVectorImpl<RegionSuccessor> &successors) {
+  // Create a list of null attributes for each operand to comply with the
+  // `getSuccessorRegions` interface definition that requires a single
+  // attribute per operand.
+  SmallVector<Attribute, 2> operandAttributes(
+      regionInterface.getOperation()->getNumOperands());
+
+  // Get all successor regions using the temporarily allocated
+  // `operandAttributes`.
+  regionInterface.getSuccessorRegions(index, operandAttributes, successors);
+}
+
 namespace {
 //===----------------------------------------------------------------------===//
 // BufferPlacementAliasAnalysis
@@ -166,16 +181,10 @@
 
     // Query the RegionBranchOpInterface to find potential successor regions.
     op->walk([&](RegionBranchOpInterface regionInterface) {
-      // Create an empty attribute for each operand to comply with the
-      // `getSuccessorRegions` interface definition that requires a single
-      // attribute per operand.
-      SmallVector<Attribute, 2> operandAttributes(
-          regionInterface.getOperation()->getNumOperands());
-
       // Extract all entry regions and wire all initial entry successor inputs.
       SmallVector<RegionSuccessor, 2> entrySuccessors;
-      regionInterface.getSuccessorRegions(/*index=*/llvm::None,
-                                          operandAttributes, entrySuccessors);
+      getSuccessorRegions(regionInterface, /*index=*/llvm::None,
+                          entrySuccessors);
       for (RegionSuccessor &entrySuccessor : entrySuccessors) {
         // Wire the entry region's successor arguments with the initial
         // successor inputs.
@@ -191,8 +200,8 @@
         // Iterate over all successor region entries that are reachable from the
         // current region.
         SmallVector<RegionSuccessor, 2> successorRegions;
-        regionInterface.getSuccessorRegions(
-            region.getRegionNumber(), operandAttributes, successorRegions);
+        getSuccessorRegions(regionInterface, region.getRegionNumber(),
+                            successorRegions);
         for (RegionSuccessor &successorRegion : successorRegions) {
           // Iterate over all immediate terminator operations and wire the
           // successor inputs with the operands of each terminator.
@@ -209,6 +218,83 @@
   ValueMapT aliases;
 };
 
+//===----------------------------------------------------------------------===//
+// Backedges
+//===----------------------------------------------------------------------===//
+
+/// A straight-forward program analysis which detects loop backedges induced by
+/// explicit control flow.
+class Backedges {
+public:
+  using BlockSetT = SmallPtrSet<Block *, 16>;
+  using BackedgeSetT = llvm::DenseSet<std::pair<Block *, Block *>>;
+
+public:
+  /// Constructs a new backedges analysis using the op provided.
+  Backedges(Operation *op) { recurse(op, op->getBlock()); }
+
+  /// Returns the number of backedges formed by explicit control flow.
+  size_t size() const { return edgeSet.size(); }
+
+  /// Returns the start iterator to loop over all backedges.
+  BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); }
+
+  /// Returns the end iterator to loop over all backedges.
+  BackedgeSetT::const_iterator end() const { return edgeSet.end(); }
+
+private:
+  /// Enters the current block and inserts a backedge into the `edgeSet` if we
+  /// have already visited the current block. The inserted edge links the given
+  /// `predecessor` with the `current` block.
+  bool enter(Block &current, Block *predecessor) {
+    bool inserted = visited.insert(&current).second;
+    if (!inserted)
+      edgeSet.insert(std::make_pair(predecessor, &current));
+    return inserted;
+  }
+
+  /// Leaves the current block.
+  void exit(Block &current) { visited.erase(&current); }
+
+  /// Recurses into the given operation while taking all attached regions into
+  /// account.
+  void recurse(Operation *op, Block *predecessor) {
+    Block *current = op->getBlock();
+    // If the current op implements the `BranchOpInterface`, there can be
+    // cycles in the scope of all successor blocks.
+    if (isa<BranchOpInterface>(op)) {
+      for (Block *succ : current->getSuccessors())
+        recurse(*succ, current);
+    }
+    // Recurse into all distinct regions and check for explicit control-flow
+    // loops.
+    for (Region &region : op->getRegions())
+      recurse(region.front(), current);
+  }
+
+  /// Recurses into explicit control-flow structures that are given by
+  /// the successor relation defined on the block level.
+  void recurse(Block &block, Block *predecessor) {
+    // Try to enter the current block. If this is not possible, we are
+    // currently processing this block and can safely return here.
+    if (!enter(block, predecessor))
+      return;
+
+    // Recurse into all operations and successor blocks.
+    for (auto &op : block.getOperations())
+      recurse(&op, predecessor);
+
+    // Leave the current block.
+    exit(block);
+  }
+
+  /// Stores all blocks that are currently visited and on the processing stack.
+  BlockSetT visited;
+
+  /// Stores all backedges in the format (source, target).
+  BackedgeSetT edgeSet;
+};
+
 //===----------------------------------------------------------------------===//
 // BufferPlacement
 //===----------------------------------------------------------------------===//
@@ -357,9 +443,14 @@
       for (Value value : it->second) {
         if (valuesToFree.count(value) > 0)
           continue;
-        // Check whether we have to free this particular block argument.
-        if (!dominators.dominates(definingBlock, value.getParentBlock())) {
-          toProcess.emplace_back(value, value.getParentBlock());
+        Block *parentBlock = value.getParentBlock();
+        // Check whether we have to free this particular block argument or
+        // generic value. We have to free the current alias if it is either
+        // defined in a non-dominated block or it is defined in the same block
+        // but the current value is not dominated by the source value.
+        if (!dominators.dominates(definingBlock, parentBlock) ||
+            (definingBlock == parentBlock && value.isa<BlockArgument>())) {
+          toProcess.emplace_back(value, parentBlock);
           valuesToFree.insert(value);
         } else if (visitedValues.insert(std::make_tuple(value, definingBlock))
                        .second)
@@ -431,22 +522,42 @@
     // argument belongs to the first block in a region and the parent operation
     // implements the RegionBranchOpInterface.
     Region *argRegion = block->getParent();
+    Operation *parentOp = argRegion->getParentOp();
     RegionBranchOpInterface regionInterface;
     if (!argRegion || &argRegion->front() != block ||
-        !(regionInterface =
-              dyn_cast<RegionBranchOpInterface>(argRegion->getParentOp())))
+        !(regionInterface = dyn_cast<RegionBranchOpInterface>(parentOp)))
       return;
 
     introduceCopiesForRegionSuccessors(
-        regionInterface, argRegion->getParentOp()->getRegions(),
+        regionInterface, argRegion->getParentOp()->getRegions(), blockArg,
         [&](RegionSuccessor &successorRegion) {
           // Find a predecessor of our argRegion.
           return successorRegion.getSuccessor() == argRegion;
-        },
-        [&](RegionSuccessor &successorRegion) {
-          // The operand index will be the argument number.
-          return blockArg.getArgNumber();
         });
+
+    // Check whether the block argument belongs to an entry region of the
+    // parent operation. In this case, we have to introduce an additional copy
+    // for buffer that is passed to the argument.
+    SmallVector<RegionSuccessor, 2> successorRegions;
+    getSuccessorRegions(regionInterface, llvm::None, successorRegions);
+    auto *it =
+        llvm::find_if(successorRegions, [&](RegionSuccessor &successorRegion) {
+          return successorRegion.getSuccessor() == argRegion;
+        });
+    if (it == successorRegions.end())
+      return;
+
+    // Determine the actual operand to introduce a copy for and rewire the
+    // operand to point to the copy instead.
+    Value operand =
+        regionInterface.getSuccessorEntryOperands(argRegion->getRegionNumber())
+            [llvm::find(it->getSuccessorInputs(), blockArg).getIndex()];
+    Value copy = introduceBufferCopy(operand, parentOp);
+
+    auto op = llvm::find(parentOp->getOperands(), operand);
+    assert(op != parentOp->getOperands().end() &&
+           "parentOp does not contain operand");
+    parentOp->setOperand(op.getIndex(), copy);
   }
 
   /// Introduces temporary allocs in front of all associated nested-region
@@ -455,42 +566,34 @@
     // Get the actual result index in the scope of the parent terminator.
     Operation *operation = value.getDefiningOp();
     auto regionInterface = cast<RegionBranchOpInterface>(operation);
-    introduceCopiesForRegionSuccessors(
-        regionInterface, operation->getRegions(),
-        [&](RegionSuccessor &successorRegion) {
-          // Determine whether this region has a successor entry that leaves
-          // this region by returning to its parent operation.
-          return !successorRegion.getSuccessor();
-        },
-        [&](RegionSuccessor &successorRegion) {
-          // Find the associated success input index.
-          return llvm::find(successorRegion.getSuccessorInputs(), value)
-              .getIndex();
-        });
+    // Filter successors that return to the parent operation.
+    auto regionPredicate = [&](RegionSuccessor &successorRegion) {
+      // If the RegionSuccessor has no associated successor, it will return to
+      // its parent operation.
+      return !successorRegion.getSuccessor();
+    };
+    // Introduce a copy for all region "results" that are returned to the parent
+    // operation. This is required since the parent's result value has been
+    // considered critical. Therefore, the algorithm assumes that a copy of a
+    // previously allocated buffer is returned by the operation (like in the
+    // case of a block argument).
+    introduceCopiesForRegionSuccessors(regionInterface, operation->getRegions(),
+                                       value, regionPredicate);
   }
 
   /// Introduces buffer copies for all terminators in the given regions. The
   /// regionPredicate is applied to every successor region in order to restrict
-  /// the copies to specific regions. Thereby, the operandProvider is invoked
-  /// for each matching region successor and determines the operand index that
-  /// requires a buffer copy.
-  template <typename TPredicate, typename TOperandProvider>
-  void
-  introduceCopiesForRegionSuccessors(RegionBranchOpInterface regionInterface,
-                                     MutableArrayRef<Region> regions,
-                                     const TPredicate &regionPredicate,
-                                     const TOperandProvider &operandProvider) {
-    // Create an empty attribute for each operand to comply with the
-    // `getSuccessorRegions` interface definition that requires a single
-    // attribute per operand.
-    SmallVector<Attribute, 2> operandAttributes(
-        regionInterface.getOperation()->getNumOperands());
+  /// the copies to specific regions.
+  template <typename TPredicate>
+  void introduceCopiesForRegionSuccessors(
+      RegionBranchOpInterface regionInterface, MutableArrayRef<Region> regions,
+      Value argValue, const TPredicate &regionPredicate) {
     for (Region &region : regions) {
       // Query the regionInterface to get all successor regions of the current
       // one.
       SmallVector<RegionSuccessor, 2> successorRegions;
-      regionInterface.getSuccessorRegions(region.getRegionNumber(),
-                                          operandAttributes, successorRegions);
+      getSuccessorRegions(regionInterface, region.getRegionNumber(),
+                          successorRegions);
       // Try to find a matching region successor.
       RegionSuccessor *regionSuccessor =
           llvm::find_if(successorRegions, regionPredicate);
@@ -498,7 +601,9 @@
         continue;
       // Get the operand index in the context of the current successor input
       // bindings.
-      auto operandIndex = operandProvider(*regionSuccessor);
+      size_t operandIndex =
+          llvm::find(regionSuccessor->getSuccessorInputs(), argValue)
+              .getIndex();
 
       // Iterate over all immediate terminator operations to introduce
       // new buffer allocations. Thereby, the appropriate terminator operand
@@ -518,6 +623,16 @@
   /// its content into the newly allocated buffer. The terminator operation is
   /// used to insert the alloc and copy operations at the right places.
   Value introduceBufferCopy(Value sourceValue, Operation *terminator) {
+    // Avoid multiple copies of the same source value. This can happen in the
+    // presence of loops when a branch acts as a backedge while also having
+    // another successor that returns to its parent operation. Note: that
+    // copying copied buffers can introduce memory leaks since the invariant of
+    // BufferPlacement assumes that a buffer will be only copied once into a
+    // temporary buffer. Hence, the construction of copy chains introduces
+    // additional allocations that are not tracked automatically by the
+    // algorithm.
+    if (copiedValues.contains(sourceValue))
+      return sourceValue;
     // Create a new alloc at the current location of the terminator.
     auto memRefType = sourceValue.getType().cast<MemRefType>();
     OpBuilder builder(terminator);
@@ -541,6 +656,8 @@
     // allocation to the new one.
     builder.create<linalg::CopyOp>(terminator->getLoc(), sourceValue, alloc);
 
+    // Remember the copy of original source value.
+    copiedValues.insert(alloc);
     return alloc;
   }
 
@@ -652,6 +769,9 @@
   /// Maps allocation nodes to their associated blocks.
   AllocEntryList allocs;
 
+  // Stores already copied allocations to avoid additional copies of copies.
+  ValueSetT copiedValues;
+
   /// The underlying liveness analysis to compute fine grained information
   /// about alloc and dealloc positions.
   Liveness liveness;
@@ -673,6 +793,14 @@
 struct BufferPlacementPass : BufferPlacementBase<BufferPlacementPass> {
 
   void runOnFunction() override {
+    // Ensure that there are supported loops only.
+    Backedges backedges(getFunction());
+    if (backedges.size()) {
+      getFunction().emitError(
+          "Structured control-flow loops are supported only.");
+      return;
+    }
+
     // Place all required alloc, copy and dealloc nodes.
     BufferPlacement placement(getFunction());
     placement.place();
diff --git a/mlir/test/Transforms/buffer-placement.mlir b/mlir/test/Transforms/buffer-placement.mlir
--- a/mlir/test/Transforms/buffer-placement.mlir
+++ b/mlir/test/Transforms/buffer-placement.mlir
@@ -1125,3 +1125,295 @@
 //      CHECK: %[[ALLOCA:.*]] = alloca(%arg0, %arg1)
 // CHECK-NEXT: scf.yield %[[ALLOC0]]
 //      CHECK: return %[[ALLOC1]]
+
+// -----
+
+// Test Case: structured control-flow loop using a nested alloc.
+// The alloc positions of %3 will not be changed, but the iteration argument
+// %iterBuf has to be freed before yielding %3 to avoid memory leaks.
+
+// -----
+
+// CHECK-LABEL: func @loop_alloc
+func @loop_alloc(
+  %lb: index,
+  %ub: index,
+  %step: index,
+  %buf: memref<2xf32>,
+  %res: memref<2xf32>) {
+  %0 = alloc() : memref<2xf32>
+  %1 = scf.for %i = %lb to %ub step %step
+    iter_args(%iterBuf = %buf) -> memref<2xf32> {
+    %2 = cmpi "eq", %i, %ub : index
+    %3 = alloc() : memref<2xf32>
+    scf.yield %3 : memref<2xf32>
+  }
+  "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> ()
+  return
+}
+
+//      CHECK: %[[ALLOC0:.*]] = alloc()
+// CHECK-NEXT: dealloc %[[ALLOC0]]
+// CHECK-NEXT: %[[ALLOC1:.*]] = alloc()
+//      CHECK: linalg.copy(%arg3, %[[ALLOC1]])
+//      CHECK: %[[ALLOC2:.*]] = scf.for {{.*}} iter_args(%[[IALLOC:.*]] = %[[ALLOC1]]
+//      CHECK:    cmpi
+//      CHECK:    dealloc %[[IALLOC]]
+//      CHECK:    %[[ALLOC3:.*]] = alloc()
+//      CHECK:    %[[ALLOC4:.*]] = alloc()
+//      CHECK:    linalg.copy(%[[ALLOC3]], %[[ALLOC4]])
+//      CHECK:    dealloc %[[ALLOC3]]
+//      CHECK:    scf.yield %[[ALLOC4]]
+//      CHECK: }
+//      CHECK: linalg.copy(%[[ALLOC2]], %arg4)
+// CHECK-NEXT: dealloc %[[ALLOC2]]
+
+// -----
+
+// Test Case: structured control-flow loop with a nested if operation.
+// The loop yields buffers that have been defined outside of the loop and the
+// backeges only use the iteration arguments (or one of its aliases).
+// Therefore, we do not have to (and are not allowed to) free any buffers
+// that are passed via the backedges.
+
+// CHECK-LABEL: func @loop_nested_if_no_alloc
+func @loop_nested_if_no_alloc(
+  %lb: index,
+  %ub: index,
+  %step: index,
+  %buf: memref<2xf32>,
+  %res: memref<2xf32>) {
+  %0 = alloc() : memref<2xf32>
+  %1 = scf.for %i = %lb to %ub step %step
+    iter_args(%iterBuf = %buf) -> memref<2xf32> {
+    %2 = cmpi "eq", %i, %ub : index
+    %3 = scf.if %2 -> (memref<2xf32>) {
+      scf.yield %0 : memref<2xf32>
+    } else {
+      scf.yield %iterBuf : memref<2xf32>
+    }
+    scf.yield %3 : memref<2xf32>
+  }
+  "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> ()
+  return
+}
+
+//      CHECK: %[[ALLOC0:.*]] = alloc()
+// CHECK-NEXT: %[[ALLOC1:.*]] = scf.for {{.*}} iter_args(%[[IALLOC:.*]] =
+//      CHECK: %[[ALLOC2:.*]] = scf.if
+//      CHECK: scf.yield %[[ALLOC0]]
+//      CHECK: scf.yield %[[IALLOC]]
+//      CHECK: scf.yield %[[ALLOC2]]
+//      CHECK: linalg.copy(%[[ALLOC1]], %arg4)
+//      CHECK: dealloc %[[ALLOC0]]
+
+// -----
+
+// Test Case: structured control-flow loop with a nested if operation using
+// a deeply nested buffer allocation.
+// Since the innermost allocation happens in a divergent branch, we have to
+// introduce additional copies for the nested if operation. Since the loop's
+// yield operation "returns" %3, it will return a newly allocated buffer.
+// Therefore, we have to free the iteration argument %iterBuf before
+// "returning" %3.
+
+// CHECK-LABEL: func @loop_nested_if_alloc
+func @loop_nested_if_alloc(
+  %lb: index,
+  %ub: index,
+  %step: index,
+  %buf: memref<2xf32>) -> memref<2xf32> {
+  %0 = alloc() : memref<2xf32>
+  %1 = scf.for %i = %lb to %ub step %step
+    iter_args(%iterBuf = %buf) -> memref<2xf32> {
+    %2 = cmpi "eq", %i, %ub : index
+    %3 = scf.if %2 -> (memref<2xf32>) {
+      %4 = alloc() : memref<2xf32>
+      scf.yield %4 : memref<2xf32>
+    } else {
+      scf.yield %0 : memref<2xf32>
+    }
+    scf.yield %3 : memref<2xf32>
+  }
+  return %1 : memref<2xf32>
+}
+
+//      CHECK: %[[ALLOC0:.*]] = alloc()
+//      CHECK: %[[ALLOC1:.*]] = alloc()
+// CHECK-NEXT: linalg.copy(%arg3, %[[ALLOC1]])
+// CHECK-NEXT: %[[ALLOC2:.*]] = scf.for {{.*}} iter_args(%[[IALLOC:.*]] = %[[ALLOC1]]
+//      CHECK: dealloc %[[IALLOC]]
+//      CHECK: %[[ALLOC3:.*]] = scf.if
+
+//      CHECK: %[[ALLOC4:.*]] = alloc()
+// CHECK-NEXT: %[[ALLOC5:.*]] = alloc()
+// CHECK-NEXT: linalg.copy(%[[ALLOC4]], %[[ALLOC5]])
+// CHECK-NEXT: dealloc %[[ALLOC4]]
+// CHECK-NEXT: scf.yield %[[ALLOC5]]
+
+//      CHECK: %[[ALLOC6:.*]] = alloc()
+// CHECK-NEXT: linalg.copy(%[[ALLOC0]], %[[ALLOC6]])
+// CHECK-NEXT: scf.yield %[[ALLOC6]]
+
+//      CHECK: %[[ALLOC7:.*]] = alloc()
+// CHECK-NEXT: linalg.copy(%[[ALLOC3:.*]], %[[ALLOC7]])
+// CHECK-NEXT: dealloc %[[ALLOC3]]
+// CHECK-NEXT: scf.yield %[[ALLOC7]]
+
+//      CHECK: dealloc %[[ALLOC0]]
+// CHECK-NEXT: return %[[ALLOC2]]
+
+// -----
+
+// Test Case: several nested structured control-flow loops with a deeply nested
+// buffer allocation inside an if operation.
+// Same behavior is an loop_nested_if_alloc: we have to insert deallocations
+// before each yield in all loops recursively.
+
+// CHECK-LABEL: func @loop_nested_alloc
+func @loop_nested_alloc(
+  %lb: index,
+  %ub: index,
+  %step: index,
+  %buf: memref<2xf32>,
+  %res: memref<2xf32>) {
+  %0 = alloc() : memref<2xf32>
+  %1 = scf.for %i = %lb to %ub step %step
+    iter_args(%iterBuf = %buf) -> memref<2xf32> {
+    %2 = scf.for %i2 = %lb to %ub step %step
+      iter_args(%iterBuf2 = %iterBuf) -> memref<2xf32> {
+      %3 = scf.for %i3 = %lb to %ub step %step
+        iter_args(%iterBuf3 = %iterBuf2) -> memref<2xf32> {
+        %4 = alloc() : memref<2xf32>
+        %5 = cmpi "eq", %i, %ub : index
+        %6 = scf.if %5 -> (memref<2xf32>) {
+          %7 = alloc() : memref<2xf32>
+          scf.yield %7 : memref<2xf32>
+        } else {
+          scf.yield %iterBuf3 : memref<2xf32>
+        }
+        scf.yield %6 : memref<2xf32>
+      }
+      scf.yield %3 : memref<2xf32>
+    }
+    scf.yield %2 : memref<2xf32>
+  }
+  "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> ()
+  return
+}
+
+//      CHECK: %[[ALLOC0:.*]] = alloc()
+// CHECK-NEXT: dealloc %[[ALLOC0]]
+// CHECK-NEXT: %[[ALLOC1:.*]] = alloc()
+// CHECK-NEXT: linalg.copy(%arg3, %[[ALLOC1]])
+// CHECK-NEXT: %[[VAL_7:.*]] = scf.for {{.*}} iter_args(%[[IALLOC0:.*]] = %[[ALLOC1]])
+//      CHECK: %[[ALLOC2:.*]] = alloc()
+// CHECK-NEXT: linalg.copy(%[[IALLOC0]], %[[ALLOC2]])
+// CHECK-NEXT: dealloc %[[IALLOC0]]
+// CHECK-NEXT: %[[ALLOC3:.*]] = scf.for {{.*}} iter_args(%[[IALLOC1:.*]] = %[[ALLOC2]])
+//      CHECK: %[[ALLOC5:.*]] = alloc()
+// CHECK-NEXT: linalg.copy(%[[IALLOC1]], %[[ALLOC5]])
+// CHECK-NEXT: dealloc %[[IALLOC1]]
+
+//      CHECK: %[[ALLOC6:.*]] = scf.for {{.*}} iter_args(%[[IALLOC2:.*]] = %[[ALLOC5]])
+//      CHECK: %[[ALLOC8:.*]] = alloc()
+// CHECK-NEXT: dealloc %[[ALLOC8]]
+//      CHECK: %[[ALLOC9:.*]] = scf.if
+
+//      CHECK: %[[ALLOC11:.*]] = alloc()
+// CHECK-NEXT: %[[ALLOC12:.*]] = alloc()
+// CHECK-NEXT: linalg.copy(%[[ALLOC11]], %[[ALLOC12]])
+// CHECK-NEXT: dealloc %[[ALLOC11]]
+// CHECK-NEXT: scf.yield %[[ALLOC12]]
+
+//      CHECK: %[[ALLOC13:.*]] = alloc()
+// CHECK-NEXT: linalg.copy(%[[IALLOC2]], %[[ALLOC13]])
+// CHECK-NEXT: scf.yield %[[ALLOC13]]
+
+//      CHECK: dealloc %[[IALLOC2]]
+// CHECK-NEXT: %[[ALLOC10:.*]] = alloc()
+// CHECK-NEXT: linalg.copy(%[[ALLOC9]], %[[ALLOC10]])
+// CHECK-NEXT: dealloc %[[ALLOC9]]
+// CHECK-NEXT: scf.yield %[[ALLOC10]]
+
+//      CHECK: %[[ALLOC7:.*]] = alloc()
+// CHECK-NEXT: linalg.copy(%[[ALLOC6]], %[[ALLOC7]])
+// CHECK-NEXT: dealloc %[[ALLOC6]]
+// CHECK-NEXT: scf.yield %[[ALLOC7]]
+
+//      CHECK: %[[ALLOC4:.*]] = alloc()
+// CHECK-NEXT: linalg.copy(%[[ALLOC3]], %[[ALLOC4]])
+// CHECK-NEXT: dealloc %[[ALLOC3]]
+// CHECK-NEXT: scf.yield %[[ALLOC4]]
+
+//      CHECK: linalg.copy(%[[VAL_7]], %arg4)
+// CHECK-NEXT: dealloc %[[VAL_7]]
+
+// -----
+
+// Test Case: explicit control-flow loop with a dynamically allocated buffer.
+// The BufferPlacement transformation should fail on this explicit
+// control-flow loop since they are not supported.
+
+// CHECK-LABEL: func @loop_dynalloc
+func @loop_dynalloc(
+  %arg0 : i32,
+  %arg1 : i32,
+  %arg2: memref<?xf32>,
+  %arg3: memref<?xf32>) {
+  %const0 = constant 0 : i32
+  br ^loopHeader(%const0, %arg2 : i32, memref<?xf32>)
+
+^loopHeader(%i : i32, %buff : memref<?xf32>):
+  %lessThan = cmpi "slt", %i, %arg1 : i32
+  cond_br %lessThan,
+    ^loopBody(%i, %buff : i32, memref<?xf32>),
+    ^exit(%buff : memref<?xf32>)
+
+^loopBody(%val : i32, %buff2: memref<?xf32>):
+  %const1 = constant 1 : i32
+  %inc = addi %val, %const1 : i32
+  %size = std.index_cast %inc : i32 to index
+  %alloc1 = alloc(%size) : memref<?xf32>
+  br ^loopHeader(%inc, %alloc1 : i32, memref<?xf32>)
+
+^exit(%buff3 : memref<?xf32>):
+  "linalg.copy"(%buff3, %arg3) : (memref<?xf32>, memref<?xf32>) -> ()
+  return
+}
+
+// expected-error@+1 {{Structured control-flow loops are supported only}}
+
+// -----
+
+// Test Case: explicit control-flow loop with a dynamically allocated buffer.
+// The BufferPlacement transformation should fail on this explicit
+// control-flow loop since they are not supported.
+
+// CHECK-LABEL: func @do_loop_alloc
+func @do_loop_alloc(
+  %arg0 : i32,
+  %arg1 : i32,
+  %arg2: memref<2xf32>,
+  %arg3: memref<2xf32>) {
+  %const0 = constant 0 : i32
+  br ^loopBody(%const0, %arg2 : i32, memref<2xf32>)
+
+^loopBody(%val : i32, %buff2: memref<2xf32>):
+  %const1 = constant 1 : i32
+  %inc = addi %val, %const1 : i32
+  %alloc1 = alloc() : memref<2xf32>
+  br ^loopHeader(%inc, %alloc1 : i32, memref<2xf32>)
+
+^loopHeader(%i : i32, %buff : memref<2xf32>):
+  %lessThan = cmpi "slt", %i, %arg1 : i32
+  cond_br %lessThan,
+    ^loopBody(%i, %buff : i32, memref<2xf32>),
+    ^exit(%buff : memref<2xf32>)
+
+^exit(%buff3 : memref<2xf32>):
+  "linalg.copy"(%buff3, %arg3) : (memref<2xf32>, memref<2xf32>) -> ()
+  return
+}
+
+// expected-error@+1 {{Structured control-flow loops are supported only}}