diff --git a/mlir/lib/Transforms/BufferPlacement.cpp b/mlir/lib/Transforms/BufferPlacement.cpp
--- a/mlir/lib/Transforms/BufferPlacement.cpp
+++ b/mlir/lib/Transforms/BufferPlacement.cpp
@@ -7,14 +7,15 @@
 //===----------------------------------------------------------------------===//
 //
 // This file implements logic for computing correct alloc and dealloc positions.
-// The main class is the BufferPlacementPass class that implements the
-// underlying algorithm. In order to put allocations and deallocations at safe
-// positions, it is significantly important to put them into the correct blocks.
-// However, the liveness analysis does not pay attention to aliases, which can
-// occur due to branches (and their associated block arguments) in general. For
-// this purpose, BufferPlacement firstly finds all possible aliases for a single
-// value (using the BufferPlacementAliasAnalysis class). Consider the following
-// example:
+// Furthermore, buffer placement also adds required new alloc and copy
+// operations to ensure that all buffers are deallocated.The main class is the
+// BufferPlacementPass class that implements the underlying algorithm. In order
+// to put allocations and deallocations at safe positions, it is significantly
+// important to put them into the correct blocks. However, the liveness analysis
+// does not pay attention to aliases, which can occur due to branches (and their
+// associated block arguments) in general. For this purpose, BufferPlacement
+// firstly finds all possible aliases for a single value (using the
+// BufferPlacementAliasAnalysis class). Consider the following example:
 //
 // ^bb0(%arg0):
 //   cond_br %cond, ^bb1, ^bb2
@@ -28,16 +29,23 @@
 //
 // Using liveness information on its own would cause us to place the allocs and
 // deallocs in the wrong block. This is due to the fact that %new_value will not
-// be liveOut of its block. Instead, we have to place the alloc for %new_value
-// in bb0 and its associated dealloc in exit. Using the class
-// BufferPlacementAliasAnalysis, we will find out that %new_value has a
-// potential alias %arg1. In order to find the dealloc position we have to find
-// all potential aliases, iterate over their uses and find the common
-// post-dominator block. In this block we can safely be sure that %new_value
-// will die and can use liveness information to determine the exact operation
-// after which we have to insert the dealloc. Finding the alloc position is
-// highly similar and non- obvious. Again, we have to consider all potential
-// aliases and find the common dominator block to place the alloc.
+// be liveOut of its block. Instead, we can place the alloc for %new_value
+// in bb0 and its associated dealloc in exit. Alternatively, the alloc can stay
+// (or even has to stay due to additional dependencies) at this location and we
+// have to free the buffer in the same block, because it cannot be freed in the
+// post dominator. However, this requires a new copy buffer for %arg1 that will
+// contain the actual contents. Using the class BufferPlacementAliasAnalysis, we
+// will find out that %new_value has a potential alias %arg1. In order to find
+// the dealloc position we have to find all potential aliases, iterate over
+// their uses and find the common post-dominator block (note that additional
+// copies and buffers remove potential aliases and will influence the placement
+// of the deallocs). In all cases, the computed block can be safely used to free
+// the %new_value buffer (may be exit or bb2) as it will die and we can use
+// liveness information to determine the exact operation after which we have to
+// insert the dealloc. Finding the alloc position is similar and non-obvious.
+// However, the algorithm supports moving allocs to other places and introducing
+// copy buffers and placing deallocs in safe places to ensure that all buffers
+// will be freed in the end.
 //
 // TODO:
 // The current implementation does not support loops and the resulting code will
@@ -49,8 +57,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Transforms/BufferPlacement.h"
+#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
+#include "mlir/IR/Operation.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/Passes.h"
+#include "llvm/ADT/SetOperations.h"
 
 using namespace mlir;
 
@@ -67,12 +78,21 @@
 class BufferPlacementAliasAnalysis {
 public:
   using ValueSetT = SmallPtrSet<Value, 16>;
+  using ValueMapT = llvm::DenseMap<Value, ValueSetT>;
 
 public:
   /// Constructs a new alias analysis using the op provided.
   BufferPlacementAliasAnalysis(Operation *op) { build(op->getRegions()); }
 
-  /// Finds all immediate and indirect aliases this value could potentially
+  /// Find all immediate aliases this value could potentially have.
+  ValueMapT::const_iterator find(Value value) const {
+    return aliases.find(value);
+  }
+
+  /// Returns the end iterator that can be used in combination with find.
+  ValueMapT::const_iterator end() const { return aliases.end(); }
+
+  /// Find all immediate and indirect aliases this value could potentially
   /// have. Note that the resulting set will also contain the value provided as
   /// it is an alias of itself.
   ValueSetT resolve(Value value) const {
@@ -81,6 +101,12 @@
     return result;
   }
 
+  /// Removes the given values from all alias sets.
+  void remove(const SmallPtrSetImpl<BlockArgument> &aliasValues) {
+    for (auto &entry : aliases)
+      llvm::set_subtract(entry.second, aliasValues);
+  }
+
 private:
   /// Recursively determines alias information for the given value. It stores
   /// all newly found potential aliases in the given result set.
@@ -127,121 +153,313 @@
   }
 
   /// Maps values to all immediate aliases this value can have.
-  llvm::DenseMap<Value, ValueSetT> aliases;
+  ValueMapT aliases;
 };
 
 //===----------------------------------------------------------------------===//
-// BufferPlacementPositions
+// BufferPlacement
 //===----------------------------------------------------------------------===//
 
-/// Stores correct alloc and dealloc positions to place dialect-specific alloc
-/// and dealloc operations.
-struct BufferPlacementPositions {
+// The main buffer placement analysis used to place allocs, copies and deallocs.
+class BufferPlacement {
 public:
-  BufferPlacementPositions()
-      : allocPosition(nullptr), deallocPosition(nullptr) {}
+  using ValueSetT = BufferPlacementAliasAnalysis::ValueSetT;
+
+  /// An intermediate representation of a single allocation node.
+  struct AllocEntry {
+    /// A reference to the associated allocation node.
+    Value allocValue;
 
-  /// Creates a new positions tuple including alloc and dealloc positions.
-  BufferPlacementPositions(Operation *allocPosition, Operation *deallocPosition)
-      : allocPosition(allocPosition), deallocPosition(deallocPosition) {}
+    /// The associated placement block in which the allocation should be
+    /// performed.
+    Block *placementBlock;
 
-  /// Returns the alloc position before which the alloc operation has to be
-  /// inserted.
-  Operation *getAllocPosition() const { return allocPosition; }
+    /// The associated dealloc operation (if any).
+    Operation *deallocOperation;
+  };
 
-  /// Returns the dealloc position after which the dealloc operation has to be
-  /// inserted.
-  Operation *getDeallocPosition() const { return deallocPosition; }
+  using AllocEntryList = SmallVector<AllocEntry, 8>;
+
+public:
+  BufferPlacement(Operation *op)
+      : operation(op), aliases(op), liveness(op), dominators(op),
+        postDominators(op) {
+    // Gather all allocation nodes
+    initBlockMapping();
+  }
+
+  /// Performs the actual placement/creation of all alloc, copy and dealloc
+  /// nodes.
+  void place() {
+    // Place all allocations.
+    placeAllocs();
+    // Add additional allocations and copies that are required.
+    introduceCopies();
+    // Find all associated dealloc nodes.
+    findDeallocs();
+    // Place deallocations for all allocation entries.
+    placeDeallocs();
+  }
 
 private:
-  Operation *allocPosition;
-  Operation *deallocPosition;
-};
+  /// Initializes the internal block mapping by discovering allocation nodes. It
+  /// maps all allocation nodes to their initial block in which they can be
+  /// safely allocated.
+  void initBlockMapping() {
+    operation->walk([&](MemoryEffectOpInterface opInterface) {
+      // Try to find a single allocation result.
+      SmallVector<MemoryEffects::EffectInstance, 2> effects;
+      opInterface.getEffects(effects);
 
-//===----------------------------------------------------------------------===//
-// BufferPlacementAnalysis
-//===----------------------------------------------------------------------===//
+      SmallVector<MemoryEffects::EffectInstance, 2> allocateResultEffects;
+      llvm::copy_if(effects, std::back_inserter(allocateResultEffects),
+                    [=](MemoryEffects::EffectInstance &it) {
+                      Value value = it.getValue();
+                      return isa<MemoryEffects::Allocate>(it.getEffect()) &&
+                             value && value.isa<OpResult>();
+                    });
+      // If there is one result only, we will be able to move the allocation and
+      // (possibly existing) deallocation ops.
+      if (allocateResultEffects.size() != 1)
+        return;
+      // Get allocation result.
+      auto allocResult = allocateResultEffects[0].getValue().cast<OpResult>();
+      // Find the initial allocation block and register this result.
+      allocs.push_back(
+          {allocResult, getInitialAllocBlock(allocResult), nullptr});
+    });
+  }
 
-// The main buffer placement analysis used to place allocs and deallocs.
-class BufferPlacementAnalysis {
-public:
-  using DeallocSetT = SmallPtrSet<Operation *, 2>;
+  /// Computes a valid allocation position in a dominator (if possible) for the
+  /// given allocation result.
+  Block *getInitialAllocBlock(OpResult result) {
+    // Get all allocation operands as these operands are important for the
+    // allocation operation.
+    auto operands = result.getOwner()->getOperands();
+    if (operands.size() < 1)
+      return findCommonDominator(result, aliases.resolve(result), dominators);
+
+    // If this node has dependencies, check all dependent nodes with respect
+    // to a common post dominator in which all values are available.
+    ValueSetT dependencies(++operands.begin(), operands.end());
+    return findCommonDominator(*operands.begin(), dependencies, postDominators);
+  }
 
-public:
-  BufferPlacementAnalysis(Operation *op)
-      : operation(op), liveness(op), dominators(op), postDominators(op),
-        aliases(op) {}
-
-  /// Computes the actual positions to place allocs and deallocs for the given
-  /// value.
-  BufferPlacementPositions
-  computeAllocAndDeallocPositions(OpResult result) const {
-    if (result.use_empty())
-      return BufferPlacementPositions(result.getOwner(), result.getOwner());
-    // Get all possible aliases.
-    auto possibleValues = aliases.resolve(result);
-    return BufferPlacementPositions(getAllocPosition(result, possibleValues),
-                                    getDeallocPosition(result, possibleValues));
+  /// Finds correct alloc positions according to the algorithm described at
+  /// the top of the file for all alloc nodes that can be handled by this
+  /// analysis.
+  void placeAllocs() const {
+    for (auto &entry : allocs) {
+      Value alloc = entry.allocValue;
+      // Get the actual block to place the alloc and get liveness information
+      // for the placement block.
+      Block *placementBlock = entry.placementBlock;
+      // We have to ensure that we place the alloc before its first use in this
+      // block.
+      const LivenessBlockInfo *livenessInfo =
+          liveness.getLiveness(placementBlock);
+      Operation *startOperation = livenessInfo->getStartOperation(alloc);
+      // Check whether the start operation lies in the desired placement block.
+      // If not, we will use the terminator as this is the last operation in
+      // this block.
+      if (startOperation->getBlock() != placementBlock)
+        startOperation = placementBlock->getTerminator();
+
+      // Move the alloc in front of the start operation.
+      Operation *allocOperation = alloc.getDefiningOp();
+      allocOperation->moveBefore(startOperation);
+    }
   }
 
-  /// Finds all associated dealloc nodes for the alloc nodes using alias
-  /// information.
-  DeallocSetT findAssociatedDeallocs(OpResult allocResult) const {
-    DeallocSetT result;
-    auto possibleValues = aliases.resolve(allocResult);
-    for (Value alias : possibleValues)
-      for (Operation *op : alias.getUsers()) {
-        // Check for an existing memory effect interface.
-        auto effectInstance = dyn_cast<MemoryEffectOpInterface>(op);
-        if (!effectInstance)
+  /// Introduces required allocs and copy operations to avoid memory leaks.
+  void introduceCopies() {
+    // Initialize the set of block arguments that require a dedicated memory
+    // free operation since their arguments cannot be safely deallocated in a
+    // post dominator.
+    SmallPtrSet<BlockArgument, 8> blockArgsToFree;
+    llvm::SmallDenseSet<std::tuple<BlockArgument, Block *>> visitedBlockArgs;
+    SmallVector<std::tuple<BlockArgument, Block *>, 8> toProcess;
+
+    // Check dominance relation for proper dominance properties. If the given
+    // value node does not dominate an alias, we will have to create a copy in
+    // order to free all buffers that can potentially leak into a post
+    // dominator.
+    auto findUnsafeValues = [&](Value source, Block *definingBlock) {
+      auto it = aliases.find(source);
+      if (it == aliases.end())
+        return;
+      for (Value value : it->second) {
+        auto blockArg = value.cast<BlockArgument>();
+        if (blockArgsToFree.count(blockArg) > 0)
           continue;
-        // Check whether the associated value will be freed using the current
-        // operation.
-        SmallVector<MemoryEffects::EffectInstance, 2> effects;
-        effectInstance.getEffectsOnValue(alias, effects);
-        if (llvm::any_of(effects, [=](MemoryEffects::EffectInstance &it) {
-              return isa<MemoryEffects::Free>(it.getEffect());
-            }))
-          result.insert(op);
+        // Check whether we have to free this particular block argument.
+        if (!dominators.dominates(definingBlock, blockArg.getOwner())) {
+          toProcess.emplace_back(blockArg, blockArg.getParentBlock());
+          blockArgsToFree.insert(blockArg);
+        } else if (visitedBlockArgs.insert({blockArg, definingBlock}).second)
+          toProcess.emplace_back(blockArg, definingBlock);
       }
-    return result;
+    };
+
+    // Detect possibly unsafe aliases starting from all allocations.
+    for (auto &entry : allocs)
+      findUnsafeValues(entry.allocValue, entry.placementBlock);
+
+    // Try to find block arguments that require an explicit free operation
+    // until we reach a fix point.
+    while (!toProcess.empty()) {
+      auto current = toProcess.pop_back_val();
+      findUnsafeValues(std::get<0>(current), std::get<1>(current));
+    }
+
+    // Update buffer aliases to ensure that we free all buffers and block
+    // arguments at the correct locations.
+    aliases.remove(blockArgsToFree);
+
+    // Add new allocs and additional copy operations.
+    for (BlockArgument blockArg : blockArgsToFree) {
+      Block *block = blockArg.getOwner();
+
+      // Allocate a buffer for the current block argument in the block of
+      // the associated value (which will be a predecessor block by
+      // definition).
+      for (auto it = block->pred_begin(), e = block->pred_end(); it != e;
+           ++it) {
+        // Get the terminator and the value that will be passed to our
+        // argument.
+        Operation *terminator = (*it)->getTerminator();
+        auto successorOperand =
+            cast<BranchOpInterface>(terminator)
+                .getMutableSuccessorOperands(it.getSuccessorIndex())
+                .getValue()
+                .slice(blockArg.getArgNumber(), 1);
+        Value sourceValue = ((OperandRange)successorOperand)[0];
+
+        // Create a new alloc at the current location of the terminator.
+        auto memRefType = sourceValue.getType().cast<MemRefType>();
+        OpBuilder builder(terminator);
+
+        // Extract information about dynamically shaped types by
+        // extracting their dynamic dimensions.
+        SmallVector<Value, 4> dynamicOperands;
+        for (auto shapeElement : llvm::enumerate(memRefType.getShape())) {
+          if (!ShapedType::isDynamic(shapeElement.value()))
+            continue;
+          dynamicOperands.push_back(builder.create<DimOp>(
+              terminator->getLoc(), sourceValue, shapeElement.index()));
+        }
+
+        // TODO: provide a generic interface to create dialect-specific
+        // Alloc and CopyOp nodes.
+        auto alloc = builder.create<AllocOp>(terminator->getLoc(), memRefType,
+                                             dynamicOperands);
+        // Wire new alloc and successor operand.
+        successorOperand.assign(alloc);
+        // Create a new copy operation that copies to contents of the old
+        // allocation to the new one.
+        builder.create<linalg::CopyOp>(terminator->getLoc(), sourceValue,
+                                       alloc);
+      }
+
+      // Register the block argument to require a final dealloc. Note that
+      // we do not have to assign a block here since we do not want to
+      // move the allocation node to another location.
+      allocs.push_back({blockArg, nullptr, nullptr});
+    }
   }
 
-  /// Dumps the buffer placement information to the given stream.
-  void print(raw_ostream &os) const {
-    os << "// ---- Buffer Placement -----\n";
-
-    for (Region &region : operation->getRegions())
-      for (Block &block : region)
-        for (Operation &operation : block)
-          for (OpResult result : operation.getResults()) {
-            BufferPlacementPositions positions =
-                computeAllocAndDeallocPositions(result);
-            os << "Positions for ";
-            result.print(os);
-            os << "\n Alloc: ";
-            positions.getAllocPosition()->print(os);
-            os << "\n Dealloc: ";
-            positions.getDeallocPosition()->print(os);
-            os << "\n";
-          }
+  /// Finds associated deallocs that can be linked to our allocation nodes (if
+  /// any).
+  void findDeallocs() {
+    for (auto &entry : allocs) {
+      auto userIt =
+          llvm::find_if(entry.allocValue.getUsers(), [&](Operation *user) {
+            auto effectInterface = dyn_cast<MemoryEffectOpInterface>(user);
+            if (!effectInterface)
+              return false;
+            // Try to find a free effect that is applied to one of our values
+            // that will be automatically freed by our pass.
+            SmallVector<MemoryEffects::EffectInstance, 2> effects;
+            effectInterface.getEffectsOnValue(entry.allocValue, effects);
+            return llvm::any_of(
+                effects, [&](MemoryEffects::EffectInstance &it) {
+                  return isa<MemoryEffects::Free>(it.getEffect());
+                });
+          });
+      // Assign the associated dealloc operation (if any).
+      if (userIt != entry.allocValue.user_end())
+        entry.deallocOperation = *userIt;
+    }
   }
 
-private:
-  /// Finds a correct placement block to store alloc/dealloc node according to
-  /// the algorithm described at the top of the file. It supports dominator and
+  /// Finds correct dealloc positions according to the algorithm described at
+  /// the top of the file for all alloc nodes and block arguments that can be
+  /// handled by this analysis.
+  void placeDeallocs() const {
+    // Move or insert deallocs using the previously computed information.
+    // These deallocations will be linked to their associated allocation nodes
+    // since they don't have any aliases that can (potentially) increase their
+    // liveness.
+    for (auto &entry : allocs) {
+      Value alloc = entry.allocValue;
+      auto aliasesSet = aliases.resolve(alloc);
+      assert(aliasesSet.size() > 0 && "must contain at least one alias");
+
+      // Determine the actual block to place the dealloc and get liveness
+      // information.
+      Block *placementBlock =
+          findCommonDominator(alloc, aliasesSet, postDominators);
+      const LivenessBlockInfo *livenessInfo =
+          liveness.getLiveness(placementBlock);
+
+      // We have to ensure that the dealloc will be after the last use of all
+      // aliases of the given value. We first assume that there are no uses in
+      // the placementBlock and that we can safely place the dealloc at the
+      // beginning.
+      Operation *endOperation = &placementBlock->front();
+      // Iterate over all aliases and ensure that the endOperation will point
+      // to the last operation of all potential aliases in the placementBlock.
+      for (Value alias : aliasesSet) {
+        Operation *aliasEndOperation =
+            livenessInfo->getEndOperation(alias, endOperation);
+        // Check whether the aliasEndOperation lies in the desired block and
+        // whether it is behind the current endOperation. If yes, this will be
+        // the new endOperation.
+        if (aliasEndOperation->getBlock() == placementBlock &&
+            endOperation->isBeforeInBlock(aliasEndOperation))
+          endOperation = aliasEndOperation;
+      }
+      // endOperation is the last operation behind which we can safely store
+      // the dealloc taking all potential aliases into account.
+
+      // If there is an existing dealloc, move it to the right place.
+      if (entry.deallocOperation) {
+        entry.deallocOperation->moveAfter(endOperation);
+      } else {
+        // If the Dealloc position is at the terminator operation of the block,
+        // then the value should escape from a deallocation.
+        Operation *nextOp = endOperation->getNextNode();
+        if (!nextOp)
+          continue;
+        // If there is no dealloc node, insert one in the right place.
+        OpBuilder builder(nextOp);
+        builder.create<DeallocOp>(alloc.getLoc(), alloc);
+      }
+    }
+  }
+
+  /// Finds a common dominator for the given value while taking the positions
+  /// of the values in the value set into account. It supports dominator and
   /// post-dominator analyses via template arguments.
   template <typename DominatorT>
-  Block *
-  findPlacementBlock(OpResult result,
-                     const BufferPlacementAliasAnalysis::ValueSetT &aliases,
-                     const DominatorT &doms) const {
+  Block *findCommonDominator(Value value, const ValueSetT &values,
+                             const DominatorT &doms) const {
     // Start with the current block the value is defined in.
-    Block *dom = result.getOwner()->getBlock();
+    Block *dom = value.getParentBlock();
     // Iterate over all aliases and their uses to find a safe placement block
     // according to the given dominator information.
-    for (Value alias : aliases)
-      for (Operation *user : alias.getUsers()) {
+    for (Value childValue : values)
+      for (Operation *user : childValue.getUsers()) {
         // Move upwards in the dominator tree to find an appropriate
         // dominator block that takes the current use into account.
         dom = doms.findNearestCommonDominator(dom, user->getBlock());
@@ -249,86 +467,24 @@
     return dom;
   }
 
-  /// Finds a correct alloc position according to the algorithm described at
-  /// the top of the file.
-  Operation *getAllocPosition(
-      OpResult result,
-      const BufferPlacementAliasAnalysis::ValueSetT &aliases) const {
-    // Determine the actual block to place the alloc and get liveness
-    // information.
-    Block *placementBlock = findPlacementBlock(result, aliases, dominators);
-    const LivenessBlockInfo *livenessInfo =
-        liveness.getLiveness(placementBlock);
-
-    // We have to ensure that the alloc will be before the first use of all
-    // aliases of the given value. We first assume that there are no uses in the
-    // placementBlock and that we can safely place the alloc before the
-    // terminator at the end of the block.
-    Operation *startOperation = placementBlock->getTerminator();
-    // Iterate over all aliases and ensure that the startOperation will point to
-    // the first operation of all potential aliases in the placementBlock.
-    for (Value alias : aliases) {
-      Operation *aliasStartOperation = livenessInfo->getStartOperation(alias);
-      // Check whether the aliasStartOperation lies in the desired block and
-      // whether it is before the current startOperation. If yes, this will be
-      // the new startOperation.
-      if (aliasStartOperation->getBlock() == placementBlock &&
-          aliasStartOperation->isBeforeInBlock(startOperation))
-        startOperation = aliasStartOperation;
-    }
-    // startOperation is the first operation before which we can safely store
-    // the alloc taking all potential aliases into account.
-    return startOperation;
-  }
-
-  /// Finds a correct dealloc position according to the algorithm described at
-  /// the top of the file.
-  Operation *getDeallocPosition(
-      OpResult result,
-      const BufferPlacementAliasAnalysis::ValueSetT &aliases) const {
-    // Determine the actual block to place the dealloc and get liveness
-    // information.
-    Block *placementBlock = findPlacementBlock(result, aliases, postDominators);
-    const LivenessBlockInfo *livenessInfo =
-        liveness.getLiveness(placementBlock);
-
-    // We have to ensure that the dealloc will be after the last use of all
-    // aliases of the given value. We first assume that there are no uses in the
-    // placementBlock and that we can safely place the dealloc at the beginning.
-    Operation *endOperation = &placementBlock->front();
-    // Iterate over all aliases and ensure that the endOperation will point to
-    // the last operation of all potential aliases in the placementBlock.
-    for (Value alias : aliases) {
-      Operation *aliasEndOperation =
-          livenessInfo->getEndOperation(alias, endOperation);
-      // Check whether the aliasEndOperation lies in the desired block and
-      // whether it is behind the current endOperation. If yes, this will be the
-      // new endOperation.
-      if (aliasEndOperation->getBlock() == placementBlock &&
-          endOperation->isBeforeInBlock(aliasEndOperation))
-        endOperation = aliasEndOperation;
-    }
-    // endOperation is the last operation behind which we can safely store the
-    // dealloc taking all potential aliases into account.
-    return endOperation;
-  }
-
   /// The operation this transformation was constructed from.
   Operation *operation;
 
-  /// The underlying liveness analysis to compute fine grained information about
-  /// alloc and dealloc positions.
+  /// Alias information that can be updated during the insertion of copies.
+  BufferPlacementAliasAnalysis aliases;
+
+  /// Maps allocation nodes to their associated blocks.
+  AllocEntryList allocs;
+
+  /// The underlying liveness analysis to compute fine grained information
+  /// about alloc and dealloc positions.
   Liveness liveness;
 
-  /// The dominator analysis to place allocs in the appropriate blocks.
+  /// The dominator analysis to place deallocs in the appropriate blocks.
   DominanceInfo dominators;
 
   /// The post dominator analysis to place deallocs in the appropriate blocks.
   PostDominanceInfo postDominators;
-
-  /// The internal alias analysis to ensure that allocs and deallocs take all
-  /// their potential aliases into account.
-  BufferPlacementAliasAnalysis aliases;
 };
 
 //===----------------------------------------------------------------------===//
@@ -336,73 +492,16 @@
 //===----------------------------------------------------------------------===//
 
 /// The actual buffer placement pass that moves alloc and dealloc nodes into
-/// the right positions. It uses the algorithm described at the top of the file.
+/// the right positions. It uses the algorithm described at the top of the
+/// file.
 struct BufferPlacementPass
     : mlir::PassWrapper<BufferPlacementPass, FunctionPass> {
-  void runOnFunction() override {
-    // Get required analysis information first.
-    auto &analysis = getAnalysis<BufferPlacementAnalysis>();
-
-    // Compute an initial placement of all nodes.
-    llvm::SmallVector<std::pair<OpResult, BufferPlacementPositions>, 16>
-        placements;
-    getFunction().walk([&](MemoryEffectOpInterface op) {
-      // Try to find a single allocation result.
-      SmallVector<MemoryEffects::EffectInstance, 2> effects;
-      op.getEffects(effects);
-
-      SmallVector<MemoryEffects::EffectInstance, 2> allocateResultEffects;
-      llvm::copy_if(effects, std::back_inserter(allocateResultEffects),
-                    [=](MemoryEffects::EffectInstance &it) {
-                      Value value = it.getValue();
-                      return isa<MemoryEffects::Allocate>(it.getEffect()) &&
-                             value && value.isa<OpResult>();
-                    });
-      // If there is one result only, we will be able to move the allocation and
-      // (possibly existing) deallocation ops.
-      if (allocateResultEffects.size() == 1) {
-        // Insert allocation result.
-        auto allocResult = allocateResultEffects[0].getValue().cast<OpResult>();
-        placements.emplace_back(
-            allocResult, analysis.computeAllocAndDeallocPositions(allocResult));
-      }
-    });
 
-    // Move alloc (and dealloc - if any) nodes into the right places and insert
-    // dealloc nodes if necessary.
-    for (auto &entry : placements) {
-      // Find already associated dealloc nodes.
-      OpResult alloc = entry.first;
-      auto deallocs = analysis.findAssociatedDeallocs(alloc);
-      if (deallocs.size() > 1) {
-        emitError(alloc.getLoc(),
-                  "not supported number of associated dealloc operations");
-        return;
-      }
-
-      // Move alloc node to the right place.
-      BufferPlacementPositions &positions = entry.second;
-      Operation *allocOperation = alloc.getOwner();
-      allocOperation->moveBefore(positions.getAllocPosition());
-
-      // If there is an existing dealloc, move it to the right place.
-      Operation *nextOp = positions.getDeallocPosition()->getNextNode();
-      // If the Dealloc position is at the terminator operation of the block,
-      // then the value should escape from a deallocation.
-      if (!nextOp) {
-        assert(deallocs.empty() &&
-               "There should be no dealloc for the returned buffer");
-        continue;
-      }
-      if (deallocs.size()) {
-        (*deallocs.begin())->moveBefore(nextOp);
-      } else {
-        // If there is no dealloc node, insert one in the right place.
-        OpBuilder builder(nextOp);
-        builder.create<DeallocOp>(allocOperation->getLoc(), alloc);
-      }
-    }
-  };
+  void runOnFunction() override {
+    // Place all required alloc, copy and dealloc nodes.
+    BufferPlacement placement(getFunction());
+    placement.place();
+  }
 };
 
 } // end anonymous namespace
diff --git a/mlir/test/Transforms/buffer-placement.mlir b/mlir/test/Transforms/buffer-placement.mlir
--- a/mlir/test/Transforms/buffer-placement.mlir
+++ b/mlir/test/Transforms/buffer-placement.mlir
@@ -1,7 +1,8 @@
 // RUN: mlir-opt -buffer-placement -split-input-file %s | FileCheck %s
 
-// This file checks the behaviour of BufferPlacement pass for moving Alloc and Dealloc
-// operations and inserting the missing the DeallocOps in their correct positions.
+// This file checks the behaviour of BufferPlacement pass for moving Alloc and
+// Dealloc operations and inserting the missing the DeallocOps in their correct
+// positions.
 
 // Test Case:
 //    bb0
@@ -9,8 +10,9 @@
 //  bb1  bb2 <- Initial position of AllocOp
 //   \   /
 //    bb3
-// BufferPlacement Expected Behaviour: It should move the existing AllocOp to the entry block,
-// and insert a DeallocOp at the exit block after CopyOp since %1 is an alias for %0 and %arg1.
+// BufferPlacement Expected Behaviour: It should move the existing AllocOp to
+// the entry block, and insert a DeallocOp at the exit block after CopyOp since
+// %1 is an alias for %0 and %arg1.
 
 #map0 = affine_map<(d0) -> (d0)>
 
@@ -21,7 +23,11 @@
   br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
   %0 = alloc() : memref<2xf32>
-  linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg1, %0 {
+  linalg.generic {
+    args_in = 1 : i64,
+    args_out = 1 : i64,
+    indexing_maps = [#map0, #map0],
+    iterator_types = ["parallel"]} %arg1, %0 {
   ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
     %tmp1 = exp %gen1_arg0 : f32
     linalg.yield %tmp1 : f32
@@ -40,8 +46,154 @@
 
 // -----
 
+// Test Case:
+//    bb0
+//   /   \
+//  bb1  bb2 <- Initial position of AllocOp
+//   \   /
+//    bb3
+// BufferPlacement Expected Behaviour: It should not move the existing AllocOp
+// to any other block since the alloc has a dynamic dependency to block argument
+// %0 in bb2. Since the dynamic type is passed to bb3 via the block argument %2,
+// it is currently required to allocate a temporary buffer for %2 that gets
+// copies of %arg0 and %1 with their appropriate shape dimensions. The copy
+// buffer deallocation will be applied to %2 in block bb3.
+
+#map0 = affine_map<(d0) -> (d0)>
+
+// CHECK-LABEL: func @condBranchDynamicType
+func @condBranchDynamicType(
+  %arg0: i1,
+  %arg1: memref<?xf32>,
+  %arg2: memref<?xf32>,
+  %arg3: index) {
+  cond_br %arg0, ^bb1, ^bb2(%arg3: index)
+^bb1:
+  br ^bb3(%arg1 : memref<?xf32>)
+^bb2(%0: index):
+  %1 = alloc(%0) : memref<?xf32>
+  linalg.generic {
+    args_in = 1 : i64,
+    args_out = 1 : i64,
+    indexing_maps = [#map0, #map0],
+    iterator_types = ["parallel"]} %arg1, %1 {
+  ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
+    %tmp1 = exp %gen1_arg0 : f32
+    linalg.yield %tmp1 : f32
+  }: memref<?xf32>, memref<?xf32>
+  br ^bb3(%1 : memref<?xf32>)
+^bb3(%2: memref<?xf32>):
+  "linalg.copy"(%2, %arg2) : (memref<?xf32>, memref<?xf32>) -> ()
+  return
+}
+
+// CHECK-NEXT: cond_br
+//      CHECK: %[[DIM0:.*]] = dim
+// CHECK-NEXT: %[[ALLOC0:.*]] = alloc(%[[DIM0]])
+// CHECK-NEXT: linalg.copy(%{{.*}}, %[[ALLOC0]])
+//      CHECK: ^bb2(%[[IDX:.*]]:{{.*}})
+// CHECK-NEXT: %[[ALLOC1:.*]] = alloc(%[[IDX]])
+// CHECK-NEXT: linalg.generic
+//      CHECK: %[[DIM1:.*]] = dim %[[ALLOC1]]
+// CHECK-NEXT: %[[ALLOC2:.*]] = alloc(%[[DIM1]])
+// CHECK-NEXT: linalg.copy(%[[ALLOC1]], %[[ALLOC2]])
+// CHECK-NEXT: dealloc %[[ALLOC1]]
+// CHECK-NEXT: br ^bb3
+// CHECK-NEXT: ^bb3(%[[ALLOC3:.*]]:{{.*}})
+//      CHECK: linalg.copy(%[[ALLOC3]],
+// CHECK-NEXT: dealloc %[[ALLOC3]]
+// CHECK-NEXT: return
+
+// -----
+
+// Test Case:
+//      bb0
+//     /    \
+//   bb1    bb2 <- Initial position of AllocOp
+//    |     /  \
+//    |   bb3  bb4
+//    |     \  /
+//    \     bb5
+//     \    /
+//       bb6
+//        |
+//       bb7
+// BufferPlacement Expected Behaviour: It should not move the existing AllocOp
+// to any other block since the alloc has a dynamic dependency to block argument
+// %0 in bb2. Since the dynamic type is passed to bb5 via the block argument %2
+// and to bb6 via block argument %3, it is currently required to allocate
+// temporary buffers for %2 and %3 that gets copies of %1 and %arg0 1 with their
+// appropriate shape dimensions. The copy buffer deallocations will be applied
+// to %2 in block bb5 and to %3 in block bb6. Furthermore, there should be no
+// copy inserted for %4.
+
+#map0 = affine_map<(d0) -> (d0)>
+
+// CHECK-LABEL: func @condBranchDynamicType
+func @condBranchDynamicTypeNested(
+  %arg0: i1,
+  %arg1: memref<?xf32>,
+  %arg2: memref<?xf32>,
+  %arg3: index) {
+  cond_br %arg0, ^bb1, ^bb2(%arg3: index)
+^bb1:
+  br ^bb6(%arg1 : memref<?xf32>)
+^bb2(%0: index):
+  %1 = alloc(%0) : memref<?xf32>
+  linalg.generic {
+    args_in = 1 : i64,
+    args_out = 1 : i64,
+    indexing_maps = [#map0, #map0],
+    iterator_types = ["parallel"]} %arg1, %1 {
+  ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
+    %tmp1 = exp %gen1_arg0 : f32
+    linalg.yield %tmp1 : f32
+  }: memref<?xf32>, memref<?xf32>
+  cond_br %arg0, ^bb3, ^bb4
+^bb3:
+  br ^bb5(%1 : memref<?xf32>)
+^bb4:
+  br ^bb5(%1 : memref<?xf32>)
+^bb5(%2: memref<?xf32>):
+  br ^bb6(%2 : memref<?xf32>)
+^bb6(%3: memref<?xf32>):
+  br ^bb7(%3 : memref<?xf32>)
+^bb7(%4: memref<?xf32>):
+  "linalg.copy"(%4, %arg2) : (memref<?xf32>, memref<?xf32>) -> ()
+  return
+}
+
+// CHECK-NEXT: cond_br
+//      CHECK: ^bb1
+//      CHECK: %[[DIM0:.*]] = dim
+// CHECK-NEXT: %[[ALLOC0:.*]] = alloc(%[[DIM0]])
+// CHECK-NEXT: linalg.copy(%{{.*}}, %[[ALLOC0]])
+//      CHECK: ^bb2(%[[IDX:.*]]:{{.*}})
+// CHECK-NEXT: %[[ALLOC1:.*]] = alloc(%[[IDX]])
+// CHECK-NEXT: linalg.generic
+//      CHECK: cond_br
+//      CHECK: ^bb3:
+// CHECK-NEXT: br ^bb5(%[[ALLOC1]]{{.*}})
+//      CHECK: ^bb4:
+// CHECK-NEXT: br ^bb5(%[[ALLOC1]]{{.*}})
+// CHECK-NEXT: ^bb5(%[[ALLOC2:.*]]:{{.*}})
+//      CHECK: %[[DIM2:.*]] = dim %[[ALLOC2]]
+// CHECK-NEXT: %[[ALLOC3:.*]] = alloc(%[[DIM2]])
+// CHECK-NEXT: linalg.copy(%[[ALLOC2]], %[[ALLOC3]])
+// CHECK-NEXT: dealloc %[[ALLOC1]]
+// CHECK-NEXT: br ^bb6(%[[ALLOC3]]{{.*}})
+// CHECK-NEXT: ^bb6(%[[ALLOC4:.*]]:{{.*}})
+// CHECK-NEXT: br ^bb7(%[[ALLOC4]]{{.*}})
+// CHECK-NEXT: ^bb7(%[[ALLOC5:.*]]:{{.*}})
+//      CHECK: linalg.copy(%[[ALLOC5]],
+// CHECK-NEXT: dealloc %[[ALLOC4]]
+// CHECK-NEXT: return
+
+// -----
+
 // Test Case: Existing AllocOp with no users.
-// BufferPlacement Expected Behaviour: It should insert a DeallocOp right before ReturnOp.
+// BufferPlacement Expected Behaviour: It should insert a DeallocOp right before
+// ReturnOp.
 
 // CHECK-LABEL: func @emptyUsesValue
 func @emptyUsesValue(%arg0: memref<4xf32>) {
@@ -60,8 +212,9 @@
 //  |    bb1 <- Initial position of AllocOp
 //   \   /
 //    bb2
-// BufferPlacement Expected Behaviour: It should move the existing AllocOp to the entry block
-// and insert a DeallocOp at the exit block after CopyOp since %1 is an alias for %0 and %arg1.
+// BufferPlacement Expected Behaviour: It should move the existing AllocOp to
+// the entry block and insert a DeallocOp at the exit block after CopyOp since
+// %1 is an alias for %0 and %arg1.
 
 #map0 = affine_map<(d0) -> (d0)>
 
@@ -70,7 +223,11 @@
   cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
 ^bb1:
   %0 = alloc() : memref<2xf32>
-  linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg1, %0 {
+  linalg.generic {
+    args_in = 1 : i64,
+    args_out = 1 : i64,
+    indexing_maps = [#map0, #map0],
+    iterator_types = ["parallel"]} %arg1, %0 {
   ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
     %tmp1 = exp %gen1_arg0 : f32
     linalg.yield %tmp1 : f32
@@ -95,15 +252,20 @@
 //  |    bb1
 //   \   /
 //    bb2
-// BufferPlacement Expected Behaviour: It shouldn't move the alloc position. It only inserts
-// a DeallocOp at the exit block after CopyOp since %1 is an alias for %0 and %arg1.
+// BufferPlacement Expected Behaviour: It shouldn't move the alloc position. It
+// only inserts a DeallocOp at the exit block after CopyOp since %1 is an alias
+// for %0 and %arg1.
 
 #map0 = affine_map<(d0) -> (d0)>
 
 // CHECK-LABEL: func @invCriticalEdge
 func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = alloc() : memref<2xf32>
-  linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg1, %0 {
+  linalg.generic {
+    args_in = 1 : i64,
+    args_out = 1 : i64,
+    indexing_maps = [#map0, #map0],
+    iterator_types = ["parallel"]} %arg1, %0 {
   ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
     %tmp1 = exp %gen1_arg0 : f32
     linalg.yield %tmp1 : f32
@@ -127,28 +289,39 @@
 //  bb1  bb2
 //   \   /
 //    bb3 <- Initial position of the second AllocOp
-// BufferPlacement Expected Behaviour: It shouldn't move the AllocOps. It only inserts two missing DeallocOps in the exit block.
-// %5 is an alias for %0. Therefore, the DeallocOp for %0 should occur after the last GenericOp. The Dealloc for %7 should
-// happen after the CopyOp.
+// BufferPlacement Expected Behaviour: It shouldn't move the AllocOps. It only
+// inserts two missing DeallocOps in the exit block. %5 is an alias for %0.
+// Therefore, the DeallocOp for %0 should occur after the last GenericOp. The
+// Dealloc for %7 should happen after the CopyOp.
 
 #map0 = affine_map<(d0) -> (d0)>
 
 // CHECK-LABEL: func @ifElse
 func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = alloc() : memref<2xf32>
-  linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg1, %0 {
+  linalg.generic {
+    args_in = 1 : i64,
+    args_out = 1 : i64,
+    indexing_maps = [#map0, #map0],
+    iterator_types = ["parallel"]} %arg1, %0 {
   ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
     %tmp1 = exp %gen1_arg0 : f32
     linalg.yield %tmp1 : f32
   }: memref<2xf32>, memref<2xf32>
-  cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
+  cond_br %arg0,
+    ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
+    ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
 ^bb1(%1: memref<2xf32>, %2: memref<2xf32>):
   br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>)
 ^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
   br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>)
 ^bb3(%5: memref<2xf32>, %6: memref<2xf32>):
   %7 = alloc() : memref<2xf32>
-  linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %5, %7 {
+  linalg.generic {
+    args_in = 1 : i64,
+    args_out = 1 : i64,
+    indexing_maps = [#map0, #map0],
+    iterator_types = ["parallel"]} %5, %7 {
   ^bb0(%gen2_arg0: f32, %gen2_arg1: f32):
     %tmp2 = exp %gen2_arg0 : f32
     linalg.yield %tmp2 : f32
@@ -162,7 +335,7 @@
 //      CHECK: %[[SECOND_ALLOC:.*]] = alloc()
 // CHECK-NEXT: linalg.generic
 //      CHECK: dealloc %[[FIRST_ALLOC]]
-// CHECK-NEXT: linalg.copy
+//      CHECK: linalg.copy
 // CHECK-NEXT: dealloc %[[SECOND_ALLOC]]
 // CHECK-NEXT: return
 
@@ -174,20 +347,27 @@
 //  bb1  bb2
 //   \   /
 //    bb3
-// BufferPlacement Expected Behaviour: It shouldn't move the AllocOp. It only inserts a missing DeallocOp
-// in the exit block since %5 or %6 are the latest aliases of %0.
+// BufferPlacement Expected Behaviour: It shouldn't move the AllocOp. It only
+// inserts a missing DeallocOp in the exit block since %5 or %6 are the latest
+// aliases of %0.
 
 #map0 = affine_map<(d0) -> (d0)>
 
 // CHECK-LABEL: func @ifElseNoUsers
 func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = alloc() : memref<2xf32>
-  linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg1, %0 {
+  linalg.generic {
+    args_in = 1 : i64,
+    args_out = 1 : i64,
+    indexing_maps = [#map0, #map0],
+    iterator_types = ["parallel"]} %arg1, %0 {
   ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
     %tmp1 = exp %gen1_arg0 : f32
     linalg.yield %tmp1 : f32
   }: memref<2xf32>, memref<2xf32>
-  cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
+  cond_br %arg0,
+    ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
+    ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
 ^bb1(%1: memref<2xf32>, %2: memref<2xf32>):
   br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>)
 ^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
@@ -197,7 +377,8 @@
   return
 }
 
-//      CHECK: dealloc
+// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc()
+//      CHECK: dealloc %[[FIRST_ALLOC]]
 // CHECK-NEXT: return
 
 // -----
@@ -219,12 +400,18 @@
 // CHECK-LABEL: func @ifElseNested
 func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = alloc() : memref<2xf32>
-  linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg1, %0 {
+  linalg.generic {
+    args_in = 1 : i64,
+    args_out = 1 : i64,
+    indexing_maps = [#map0, #map0],
+    iterator_types = ["parallel"]} %arg1, %0 {
   ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
     %tmp1 = exp %gen1_arg0 : f32
     linalg.yield %tmp1 : f32
   }: memref<2xf32>, memref<2xf32>
-  cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
+  cond_br %arg0,
+    ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
+    ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
 ^bb1(%1: memref<2xf32>, %2: memref<2xf32>):
   br ^bb5(%1, %2 : memref<2xf32>, memref<2xf32>)
 ^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
@@ -235,7 +422,11 @@
   br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>)
 ^bb5(%7: memref<2xf32>, %8: memref<2xf32>):
   %9 = alloc() : memref<2xf32>
-  linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %7, %9 {
+  linalg.generic {
+    args_in = 1 : i64,
+    args_out = 1 : i64,
+    indexing_maps = [#map0, #map0],
+    iterator_types = ["parallel"]} %7, %9 {
   ^bb0(%gen2_arg0: f32, %gen2_arg1: f32):
     %tmp2 = exp %gen2_arg0 : f32
     linalg.yield %tmp2 : f32
@@ -249,28 +440,36 @@
 //      CHECK: %[[SECOND_ALLOC:.*]] = alloc()
 // CHECK-NEXT: linalg.generic
 //      CHECK: dealloc %[[FIRST_ALLOC]]
-// CHECK-NEXT: linalg.copy
+//      CHECK: linalg.copy
 // CHECK-NEXT: dealloc %[[SECOND_ALLOC]]
 // CHECK-NEXT: return
 
 // -----
 
 // Test Case: Dead operations in a single block.
-// BufferPlacement Expected Behaviour: It shouldn't move the AllocOps. It only inserts the two missing DeallocOps
-// after the last GenericOp.
+// BufferPlacement Expected Behaviour: It shouldn't move the AllocOps. It only
+// inserts the two missing DeallocOps after the last GenericOp.
 
 #map0 = affine_map<(d0) -> (d0)>
 
 // CHECK-LABEL: func @redundantOperations
 func @redundantOperations(%arg0: memref<2xf32>) {
   %0 = alloc() : memref<2xf32>
-  linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0, %0 {
+  linalg.generic {
+    args_in = 1 : i64,
+    args_out = 1 : i64,
+    indexing_maps = [#map0, #map0],
+    iterator_types = ["parallel"]} %arg0, %0 {
   ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
     %tmp1 = exp %gen1_arg0 : f32
     linalg.yield %tmp1 : f32
   }: memref<2xf32>, memref<2xf32>
   %1 = alloc() : memref<2xf32>
-  linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %0, %1 {
+  linalg.generic {
+    args_in = 1 : i64,
+    args_out = 1 : i64,
+    indexing_maps = [#map0, #map0],
+    iterator_types = ["parallel"]} %0, %1 {
   ^bb0(%gen2_arg0: f32, %gen2_arg1: f32):
     %tmp2 = exp %gen2_arg0 : f32
     linalg.yield %tmp2 : f32
@@ -290,22 +489,30 @@
 // -----
 
 // Test Case:
-//                                            bb0
-//                                           /   \
-// Initial position of the first AllocOp -> bb1  bb2 <- Initial position of the second AllocOp
-//                                           \   /
-//                                            bb3
-// BufferPlacement Expected Behaviour: Both AllocOps should be moved to the entry block. Both missing DeallocOps should be moved to
-// the exit block after CopyOp since %arg2 is an alias for %0 and %1.
+//                                     bb0
+//                                    /   \
+// Initial pos of the 1st AllocOp -> bb1  bb2 <- Initial pos of the 2nd AllocOp
+//                                    \   /
+//                                     bb3
+// BufferPlacement Expected Behaviour: Both AllocOps should be moved to the
+// entry block. Both missing DeallocOps should be moved to the exit block after
+// CopyOp since %arg2 is an alias for %0 and %1.
 
 #map0 = affine_map<(d0) -> (d0)>
 
 // CHECK-LABEL: func @moving_alloc_and_inserting_missing_dealloc
-func @moving_alloc_and_inserting_missing_dealloc(%cond: i1, %arg0: memref<2xf32>, %arg1: memref<2xf32>){
+func @moving_alloc_and_inserting_missing_dealloc(
+  %cond: i1,
+    %arg0: memref<2xf32>,
+    %arg1: memref<2xf32>) {
   cond_br %cond, ^bb1, ^bb2
 ^bb1:
   %0 = alloc() : memref<2xf32>
-  linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0, %0 {
+  linalg.generic {
+    args_in = 1 : i64,
+    args_out = 1 : i64,
+    indexing_maps = [#map0, #map0],
+    iterator_types = ["parallel"]} %arg0, %0 {
   ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
     %tmp1 = exp %gen1_arg0 : f32
     linalg.yield %tmp1 : f32
@@ -313,7 +520,11 @@
   br ^exit(%0 : memref<2xf32>)
 ^bb2:
   %1 = alloc() : memref<2xf32>
-  linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0, %1 {
+  linalg.generic {
+    args_in = 1 : i64,
+    args_out = 1 : i64,
+    indexing_maps = [#map0, #map0],
+    iterator_types = ["parallel"]} %arg0, %1 {
   ^bb0(%gen2_arg0: f32, %gen2_arg1: f32):
     %tmp2 = exp %gen2_arg0 : f32
     linalg.yield %tmp2 : f32
@@ -333,25 +544,33 @@
 
 // -----
 
-// Test Case: Invalid position of the DeallocOp. There is a user after deallocation.
+// Test Case: Invalid position of the DeallocOp. There is a user after
+// deallocation.
 //   bb0
 //  /   \
 // bb1  bb2 <- Initial position of AllocOp
 //  \   /
 //   bb3
-// BufferPlacement Expected Behaviour: It should move the AllocOp to the entry block.
-// The existing DeallocOp should be moved to exit block.
+// BufferPlacement Expected Behaviour: It should move the AllocOp to the entry
+// block. The existing DeallocOp should be moved to exit block.
 
 #map0 = affine_map<(d0) -> (d0)>
 
 // CHECK-LABEL: func @moving_invalid_dealloc_op_complex
-func @moving_invalid_dealloc_op_complex(%cond: i1, %arg0: memref<2xf32>, %arg1: memref<2xf32>){
+func @moving_invalid_dealloc_op_complex(
+  %cond: i1,
+    %arg0: memref<2xf32>,
+    %arg1: memref<2xf32>) {
   cond_br %cond, ^bb1, ^bb2
 ^bb1:
   br ^exit(%arg0 : memref<2xf32>)
 ^bb2:
   %1 = alloc() : memref<2xf32>
-  linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0, %1 {
+  linalg.generic {
+    args_in = 1 : i64,
+    args_out = 1 : i64,
+    indexing_maps = [#map0, #map0],
+    iterator_types = ["parallel"]} %arg0, %1 {
   ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
     %tmp1 = exp %gen1_arg0 : f32
     linalg.yield %tmp1 : f32
@@ -375,9 +594,15 @@
 #map0 = affine_map<(d0) -> (d0)>
 
 // CHECK-LABEL: func @inserting_missing_dealloc_simple
-func @inserting_missing_dealloc_simple(%arg0 : memref<2xf32>, %arg1: memref<2xf32>){
+func @inserting_missing_dealloc_simple(
+  %arg0 : memref<2xf32>,
+  %arg1: memref<2xf32>) {
   %0 = alloc() : memref<2xf32>
-  linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0, %0 {
+  linalg.generic {
+    args_in = 1 : i64,
+    args_out = 1 : i64,
+    indexing_maps = [#map0, #map0],
+    iterator_types = ["parallel"]} %arg0, %0 {
   ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
     %tmp1 = exp %gen1_arg0 : f32
     linalg.yield %tmp1 : f32
@@ -391,14 +616,19 @@
 
 // -----
 
-// Test Case: Moving invalid DeallocOp (there is a user after deallocation) in a single block.
+// Test Case: Moving invalid DeallocOp (there is a user after deallocation) in a
+// single block.
 
 #map0 = affine_map<(d0) -> (d0)>
 
 // CHECK-LABEL: func @moving_invalid_dealloc_op
-func @moving_invalid_dealloc_op(%arg0 : memref<2xf32>, %arg1: memref<2xf32>){
+func @moving_invalid_dealloc_op(%arg0 : memref<2xf32>, %arg1: memref<2xf32>) {
   %0 = alloc() : memref<2xf32>
-  linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0, %0 {
+  linalg.generic {
+    args_in = 1 : i64,
+    args_out = 1 : i64,
+    indexing_maps = [#map0, #map0],
+    iterator_types = ["parallel"]} %arg0, %0 {
   ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
     %tmp1 = exp %gen1_arg0 : f32
     linalg.yield %tmp1 : f32