Index: llvm/include/llvm/Analysis/LoopAllocationInfo.h
===================================================================
--- /dev/null
+++ llvm/include/llvm/Analysis/LoopAllocationInfo.h
@@ -0,0 +1,154 @@
+//===- LoopAllocationInfo.h - memory allocations inside loops ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_LOOPALLOCATIONINFO_H
+#define LLVM_ANALYSIS_LOOPALLOCATIONINFO_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+class BasicBlock;
+class CallInst;
+class DominatorTree;
+class Loop;
+class LoopInfo;
+class TargetLibraryInfo;
+
+/// Track %ptr = allocfn to one or more freefn(%ptr) relationships within the
+/// loop.
+///
+/// This analysis stores information about allocation and deallocation calls
+/// where the pointer is only live within the loop. Its design is tightly
+/// coupled to the internal implementation of LICM, but is exposed in a public
+/// API because the llvm::hoistRegion and llvm::sinkRegion functions in LICM are
+/// also exposed.
+///
+/// An allocation and its corresponding deallocations must be both hoisted and
+/// sunk, or neither. LICM performs sinking first, then hoisting. As sinking and
+/// hoisting is performed, more instructions will appear to be loop invariant,
+/// creating more hoisting opportunities. For this reason we split the analysis
+/// into two pieces:
+///   1. for a given pointer that is only live in the loop, retain a list its
+///      matching deallocating instructions.
+///   2. during sinking, if we would sink the deallocation, don't, but inform
+///      the LoopAllocationInfo. If all deallocations would have been sunk, and
+///      hoistSink would hoist the allocation, perform both the hoist and sink
+///      the various deallocation instructions.
+///
+/// There's two subtleties here. First, we don't need guaranteed execution of
+/// any particular deallocation function during unwinding. Before this
+/// transformation, an unwind mid-loop would leak one pointer that was allocated
+/// at the start of the loop. After this transformation, it would leak the one
+/// pointer allocated before the loop began. There is no difference visible to
+/// the caller.
+///
+/// Second, we don't need to identify every deallocation in order for the
+/// transformation to be safe. We need to know that all backedges (technically,
+/// all edges back to the header block, which may be a superset of backedges in
+/// the event of unnatural loop cycles) are only reached by passing through some
+/// deallocation. Those deallocations are guaranteed to be sunk if/when we hoist
+/// the allocation. Other functions may also happen to free the pointer, but we
+/// would have no way of knowing that. If the function frees and we go back to
+/// the header block, the code has undefined behaviour due to a double-free. For
+/// it to have well-defined behaviour after freeing, control must pass to a loop
+/// exit block.
+///
+/// Every exit block is categorized into blocks where the pointer is certainly
+/// freed by one of the deallocations we've already identified, or is certainly
+/// not freed by any of the deallocations we've identified. In the former case
+/// we will sink a free into that exit block. An opaque function that
+/// deallocates fits into the latter category, and we won't insert a free call
+/// in the block. In the remaining case where there are multiple paths to an
+/// exit block some of which free the pointer and others that don't pass through
+/// an identified free, we abort the transform.
+///
+/// We also have to ensure that the maximum amount of allocated memory is not
+/// raised by this transformation. To ensure this, we stop looking for
+/// allocations after seeing an opaque function call. For example:
+///   loop_top:
+///     %ptr1 = call i8* @malloc(i32 %loop_invariant1)  ;; candidate
+///     %ptr2 = call i8* @malloc(i32 %loop_variant2)    ;; not candidate
+///     %ptr3 = call i8* @malloc(i32 %loop_invariant3)  ;; candidate
+///     call void @opaque()                             ;; stop scanning
+///     %ptr5 = call i8* @malloc(i32 %loop_invariant5)  ;; not candidate
+///
+/// The same idea applies in reverse order to @free calls at the loop exits:
+///   loop_exiting:
+///      call void @free(%ptr1)  ;; not candidate
+///      call void @opaque()     ;; stop scanning
+///      call void @free(%ptr2)  ;; %ptr2 is candidate
+///                              ;; start here and scan upwards
+class LoopAllocationInfo {
+public:
+  void analyzeLoop(LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI,
+                   Loop *CurLoop);
+
+  // LICM has determined that this deallocation call may be sunk.
+  void addSafeToSink(const CallInst *CI) {
+    if (DeallocationToAllocation.count(CI))
+      SafeToSink.insert(CI);
+  }
+
+  // Given an allocation call, return true iff all the safety conditions met to
+  // hoist this allocation and sink its deallocations.
+  bool mayHoist(const CallInst *CI) const;
+
+  // Retrieve the list of free calls in the loop matching a given allocation.
+  const SmallVector<CallInst *, 16> &FreesForMalloc(CallInst *CI) const {
+    assert(Entries.count(CI));
+    return Entries.find(CI)->second.Deallocations;
+  }
+
+  // Retrieve the list of loop exit blocks that are only entered after the
+  // allocation has been deallocated. All other loop exit blocks are reached
+  // with the allocation still allocated.
+  const SmallVector<BasicBlock *, 16> &
+  ExitBlocksWithPointerFreed(CallInst *CI) const {
+    assert(Entries.count(CI));
+    return Entries.find(CI)->second.ExitBlocksToAddDeallocationsTo;
+  }
+
+  // Returns whether the loop has any potentially-hoistable allocations.
+  bool empty() const { return Entries.empty(); }
+
+private:
+  struct AllocationInfoEntry {
+    AllocationInfoEntry(
+        SmallVector<CallInst *, 16> &&Deallocations,
+        SmallVector<BasicBlock *, 16> &&ExitBlocksToAddDeallocationsTo)
+        : Deallocations(Deallocations),
+          ExitBlocksToAddDeallocationsTo(ExitBlocksToAddDeallocationsTo) {}
+
+    // Instructions that deallocate this pointer, but may or may not be
+    // executed.
+    SmallVector<CallInst *, 16> Deallocations;
+
+    // Loop exit blocks which can only be entered with the pointer already
+    // freed.
+    SmallVector<BasicBlock *, 16> ExitBlocksToAddDeallocationsTo;
+  };
+
+  // Store deallocation info for a given allocation. Keyed on the allocation
+  // call.
+  DenseMap<const CallInst *, AllocationInfoEntry> Entries;
+
+  // Look up allocation matching a given deallocation.
+  DenseMap<const CallInst *, const CallInst *> DeallocationToAllocation;
+
+  // Deallocations that LICM has determined are safe to sink. They can be sunk
+  // only if the whole collection of Deallocations in an entry are sunk and its
+  // allocation are matching is hoisted.
+  DenseSet<const CallInst *> SafeToSink;
+};
+
+} // namespace llvm
+
+#endif
Index: llvm/include/llvm/Transforms/Utils/LoopUtils.h
===================================================================
--- llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -39,6 +39,7 @@
 class BasicBlock;
 class DataLayout;
 class Loop;
+class LoopAllocationInfo;
 class LoopInfo;
 class MemoryAccess;
 class MemorySSAUpdater;
@@ -119,7 +120,8 @@
 bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
                 TargetLibraryInfo *, TargetTransformInfo *, Loop *,
                 AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
-                SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
+                LoopAllocationInfo *, SinkAndHoistLICMFlags &,
+                OptimizationRemarkEmitter *);
 
 /// Walk the specified region of the CFG (defined by all blocks
 /// dominated by the specified block, and that are in the current loop) in depth
@@ -132,7 +134,8 @@
 bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
                  TargetLibraryInfo *, Loop *, AliasSetTracker *,
                  MemorySSAUpdater *, ICFLoopSafetyInfo *,
-                 SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
+                 LoopAllocationInfo *, SinkAndHoistLICMFlags &,
+                 OptimizationRemarkEmitter *);
 
 /// This function deletes dead loops. The caller of this function needs to
 /// guarantee that the loop is infact dead.
Index: llvm/lib/Analysis/CMakeLists.txt
===================================================================
--- llvm/lib/Analysis/CMakeLists.txt
+++ llvm/lib/Analysis/CMakeLists.txt
@@ -50,6 +50,7 @@
   Lint.cpp
   Loads.cpp
   LoopAccessAnalysis.cpp
+  LoopAllocationInfo.cpp
   LoopAnalysisManager.cpp
   LoopUnrollAnalyzer.cpp
   LoopInfo.cpp
Index: llvm/lib/Analysis/LoopAllocationInfo.cpp
===================================================================
--- /dev/null
+++ llvm/lib/Analysis/LoopAllocationInfo.cpp
@@ -0,0 +1,247 @@
+//===- LoopAllocationInfo.cpp - memory allocations inside loops -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopAllocationInfo.h"
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Casting.h"
+
+namespace llvm {
+
+/// Track ptr = allocfn to one or more freefn(ptr) relationships within the
+/// loop.
+///
+/// This analysis stores information about allocation and deallocation calls
+/// where the pointer is only live within the loop.
+///
+/// An allocation must be both hoisted and sunk, or neither. LICM performs
+/// sinking first, then hoisting. As sinking and hoisting is performed, more
+/// instructions will appear to be loop invariant, making more opportunities.
+/// For this reason we split the analysis into two pieces:
+///   1. for a given pointer that is only live in the loop, retain a list its
+///      matching deallocating instructions.
+///   2. during sinking, if we would sink the deallocation, don't, but inform
+///      the LoopAllocationInfo. If all deallocations would have been sunk, and
+///      hoistSink would hoist the allocation, perform both the hoist and sink
+///      the various deallocation instructions.
+///
+/// We don't need guaranteed execution of any deallocation function during
+/// unwinding. Before this transformation, an unwind mid-loop would leak one
+/// pointer that was allocated at the start of the loop. After this
+/// transformation, it would leak the one pointer allocated before the loop
+/// began. There is no difference visible to the caller.
+///
+/// We do have to ensure that the maximum amount of allocated memory is not
+/// raised by this transformation. To ensure this, we stop looking for
+/// allocations after seeing an opaque function call. For example:
+///   loop_top:
+///     %ptr1 = call i8* @malloc(i32 %loop_invariant1)  ;; candidate
+///     %ptr2 = call i8* @malloc(i32 %loop_variant2)    ;; not candidate
+///     %ptr3 = call i8* @malloc(i32 %loop_invariant3)  ;; candidate
+///     call void @opaque()                             ;; stop scanning
+///     %ptr5 = call i8* @malloc(i32 %loop_invariant5)  ;; not candidate
+///
+/// The same idea applies in reverse order to @free calls at the loop exits:
+///   loop_exiting:
+///      call void @free(%ptr1)  ;; not candidate
+///      call void @opaque()     ;; stop scanning
+///      call void @free(%ptr2)  ;; %ptr2 is candidate
+///                              ;; start here and scan upwards
+void LoopAllocationInfo::analyzeLoop(LoopInfo *LI, DominatorTree *DT,
+                                     TargetLibraryInfo *TLI, Loop *CurLoop) {
+  assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
+         "Unexpected input to LoopAllocationInfo::analyzeLoop.");
+  if (!TLI)
+    return;
+
+  SmallVector<BasicBlock *, 32> LatchBlocks;
+  CurLoop->getLoopLatches(LatchBlocks);
+  if (LatchBlocks.empty())
+    return;
+
+  SmallVector<BasicBlock *, 32> ExitBlocks;
+  CurLoop->getUniqueExitBlocks(ExitBlocks);
+  if (ExitBlocks.empty())
+    return;
+
+  // Unlike hoistRegion, we only scan the header block. If the allocation is in
+  // a later block, it must be behind some sort of branch which implies that
+  // it's conditional on something or else another optimization would've removed
+  // the unconditional branch earlier).
+  BasicBlock *HeaderBB = CurLoop->getHeader();
+  SmallPtrSet<BasicBlock *, 1> HeaderBlock;
+  HeaderBlock.insert(HeaderBB);
+  for (BasicBlock::iterator I = HeaderBB->begin(), E = HeaderBB->end(); I != E;
+       ++I) {
+    // Find a block of allocations that are guaranteed to execute, ignoring the
+    // fact that allocation functions are themselves able to exit the program or
+    // unwind.
+
+    // There are no allocation functions which are guaranteed to continue to
+    // execute the next instruction.
+    if (isGuaranteedToTransferExecutionToSuccessor(&*I)) {
+      // These two intrinsics and increase and decrease heap size respectively.
+      // Don't hoist over an allocating intrinsic in order to ensure that any
+      // out-of-memory conditions will occur on the same instruction, in case
+      // there are differences in how the instructions handle OOM. Don't hoist
+      // over a deallocating intrinsic to avoid increasing the maximum heap
+      // allocated.
+      if (match(&*I, PatternMatch::m_Intrinsic<
+                         Intrinsic::objc_autoreleasePoolPush>()) ||
+          match(
+              &*I,
+              PatternMatch::m_Intrinsic<Intrinsic::objc_autoreleasePoolPop>()))
+        break;
+
+      continue;
+    }
+
+    if (!llvm::isAllocationFn(&*I, TLI))
+      break;
+    auto CI = cast<CallInst>(I);
+
+    // Find places in the loop where the pointer is certainly freed. The places
+    // we identify must free this allocation, but there may be any number of
+    // other places that could free our pointer which we miss.
+    //
+    // We use this to ensure that the allocation is freed before we go around
+    // any loop latch.
+    SmallVector<CallInst *, 16> FreeCalls;
+    constexpr size_t UseVisitThreshold = 20;
+    SmallVector<Instruction *, UseVisitThreshold> Worklist;
+    SmallPtrSet<Instruction *, UseVisitThreshold> Visited;
+    auto Enqueue = [&](const Instruction *I) {
+      for (const Use &U : I->uses()) {
+        Instruction *User = cast<Instruction>(U.getUser());
+        if (!CurLoop->contains(User))
+          continue;
+        if (!Visited.insert(User).second)
+          continue;
+        if (Visited.size() == UseVisitThreshold)
+          return;
+        Worklist.push_back(User);
+      }
+    };
+    Enqueue(CI);
+    do {
+      Instruction *I = Worklist.pop_back_val();
+      if (isa<BitCastInst>(I)) {
+        Enqueue(I);
+      } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
+        if (GEP->hasAllZeroIndices())
+          Enqueue(GEP);
+      } else if (auto *PN = dyn_cast<PHINode>(I)) {
+        if (PN->hasConstantOrUndefValue())
+          Enqueue(I);
+      } else if (llvm::isFreeCall(I, TLI)) {
+        FreeCalls.push_back(cast<CallInst>(I));
+      }
+    } while (!Worklist.empty() && Visited.size() < UseVisitThreshold);
+
+    if (FreeCalls.empty())
+      continue;
+
+    SmallPtrSet<BasicBlock *, 32> FreeCallBBs;
+    for (auto FreeCall : FreeCalls)
+      FreeCallBBs.insert(FreeCall->getParent());
+
+    bool DoNotTransform = false;
+
+    // Check that the frees we found cover all the latches.
+    for (BasicBlock *LatchBB : LatchBlocks) {
+      if (FreeCallBBs.count(LatchBB))
+        continue;
+      SmallVector<BasicBlock *, 32> HeaderWorklist;
+      HeaderWorklist.push_back(HeaderBB);
+      if (llvm::isPotentiallyReachableFromMany(HeaderWorklist, LatchBB,
+                                               &FreeCallBBs, DT, LI)) {
+        DoNotTransform = true;
+        break;
+      }
+    }
+    if (DoNotTransform)
+      continue;
+
+    // Determine whether each exit block is reached with the pointer freed or
+    // not. If it can be reached with the pointer conditionally freed, we abort
+    // the transform.
+    SmallVector<BasicBlock *, 16> InsertDeallocation;
+    for (auto ExitBB : ExitBlocks) {
+      SmallVector<BasicBlock *, 32> HeaderWorklist, FreeCallWorklist;
+      HeaderWorklist.push_back(HeaderBB);
+      FreeCallWorklist.insert(FreeCallWorklist.end(), FreeCallBBs.begin(),
+                              FreeCallBBs.end());
+      // Starting at the header, is there a path to ExitBB without passing
+      // through any frees?
+      bool ReachExitBBWithoutFree = llvm::isPotentiallyReachableFromMany(
+          HeaderWorklist, ExitBB, &FreeCallBBs, DT, LI);
+      // Starting from each of the frees, is there a path to ExitBB without
+      // passing back through the header, or is this a single-block loop where
+      // the malloc and free are in both in the header?
+      bool ReachExitBBWithFree =
+          FreeCallBBs.count(HeaderBB) ||
+          llvm::isPotentiallyReachableFromMany(FreeCallWorklist, ExitBB,
+                                               &HeaderBlock, DT, LI);
+      assert((ReachExitBBWithoutFree || ReachExitBBWithFree) &&
+             "loop exit blocks not reachable from loop");
+      if (ReachExitBBWithoutFree && ReachExitBBWithFree) {
+        // These exist paths to this exit BB where the allocation is freed and
+        // not freed.
+        DoNotTransform = true;
+        break;
+      }
+      if (ReachExitBBWithFree) {
+        // Some blocks, such as those with a catchswitch instruction, can't
+        // have a free call inserted in them. Give up transforming the
+        // allocation in that case.
+        if (ExitBB->getFirstInsertionPt() == ExitBB->end()) {
+          DoNotTransform = true;
+          break;
+        }
+        // We'll need to insert a free-call into this exit BB if we hoist the
+        // allocation out of the loop.
+        InsertDeallocation.push_back(ExitBB);
+      }
+    }
+    if (DoNotTransform)
+      continue;
+
+    for (const CallInst *FreeCall : FreeCalls)
+      DeallocationToAllocation[FreeCall] = CI;
+    Entries.try_emplace(CI, std::move(FreeCalls),
+                        std::move(InsertDeallocation));
+  }
+}
+
+// Given an allocation call, are all the safety conditions met to hoist this
+// allocation and sink its deallocations.
+bool LoopAllocationInfo::mayHoist(const CallInst *CI) const {
+  auto it = Entries.find(CI);
+  if (it == Entries.end())
+    return false;
+  for (const CallInst *FreeCall : it->second.Deallocations) {
+    if (!SafeToSink.count(FreeCall))
+      return false;
+  }
+  return true;
+}
+
+} // namespace llvm
Index: llvm/lib/Transforms/Scalar/LICM.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/LICM.cpp
+++ llvm/lib/Transforms/Scalar/LICM.cpp
@@ -40,6 +40,7 @@
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/GuardUtils.h"
 #include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/LoopAllocationInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/LoopPass.h"
@@ -57,6 +58,7 @@
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
@@ -356,6 +358,11 @@
   ICFLoopSafetyInfo SafetyInfo(DT);
   SafetyInfo.computeLoopSafetyInfo(L);
 
+  // Scan for matching malloc/free pairs.
+  LoopAllocationInfo LAI;
+  if (L->hasDedicatedExits() && Preheader)
+    LAI.analyzeLoop(LI, DT, TLI, L);
+
   // We want to visit all of the instructions in this loop... that are not parts
   // of our subloops (they have already had their invariants hoisted out of
   // their loop, into this loop, so there is no need to process the BODIES of
@@ -369,10 +376,12 @@
                                  LicmMssaOptCap, LicmMssaNoAccForPromotionCap};
   if (L->hasDedicatedExits())
     Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
-                          CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
+                          CurAST.get(), MSSAU.get(), &SafetyInfo, &LAI, Flags,
+                          ORE);
   if (Preheader)
     Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
-                           CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
+                           CurAST.get(), MSSAU.get(), &SafetyInfo, &LAI, Flags,
+                           ORE);
 
   // Now that all loop invariants have been removed from the loop, promote any
   // memory references to scalars that we can.
@@ -476,13 +485,13 @@
                       DominatorTree *DT, TargetLibraryInfo *TLI,
                       TargetTransformInfo *TTI, Loop *CurLoop,
                       AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
-                      ICFLoopSafetyInfo *SafetyInfo,
+                      ICFLoopSafetyInfo *SafetyInfo, LoopAllocationInfo *LAI,
                       SinkAndHoistLICMFlags &Flags,
                       OptimizationRemarkEmitter *ORE) {
 
   // Verify inputs.
   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
-         CurLoop != nullptr && SafetyInfo != nullptr &&
+         CurLoop != nullptr && SafetyInfo != nullptr && LAI != nullptr &&
          "Unexpected input to sinkRegion.");
   assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
          "Either AliasSetTracker or MemorySSA should be initialized.");
@@ -500,6 +509,8 @@
     if (inSubLoop(BB, CurLoop, LI))
       continue;
 
+    bool ScanForFrees = !LAI->empty() && !isa<CallBase>(BB->getTerminator());
+
     for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
       Instruction &I = *--II;
 
@@ -532,6 +543,19 @@
           Changed = true;
         }
       }
+
+      if (ScanForFrees) {
+        if (llvm::isFreeCall(&I, TLI)) {
+          LAI->addSafeToSink(cast<CallInst>(&I));
+        } else if (isa<CallBase>(I) &&
+                   (!isa<IntrinsicInst>(I) ||
+                    match(&I, PatternMatch::m_Intrinsic<
+                                  Intrinsic::objc_autoreleasePoolPush>()))) {
+          // This call might allocate memory. Don't move frees which are before
+          // it to after it.
+          ScanForFrees = false;
+        }
+      }
     }
   }
   if (MSSAU && VerifyMemorySSA)
@@ -776,12 +800,12 @@
 bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
                        DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
                        AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
-                       ICFLoopSafetyInfo *SafetyInfo,
+                       ICFLoopSafetyInfo *SafetyInfo, LoopAllocationInfo *LAI,
                        SinkAndHoistLICMFlags &Flags,
                        OptimizationRemarkEmitter *ORE) {
   // Verify inputs.
   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
-         CurLoop != nullptr && SafetyInfo != nullptr &&
+         CurLoop != nullptr && SafetyInfo != nullptr && LAI != nullptr &&
          "Unexpected input to hoistRegion.");
   assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
          "Either AliasSetTracker or MemorySSA should be initialized.");
@@ -842,6 +866,50 @@
         continue;
       }
 
+      // If it's a malloc/free pair, try hoisting it out to the preheader. We
+      // don't need to worry about intervening unwinding function calls in this
+      // case, so we can be more aggressive than for generic instructions.
+      if (CurLoop->hasLoopInvariantOperands(&I) &&
+          llvm::isAllocationFn(&I, TLI) && LAI->mayHoist(cast<CallInst>(&I))) {
+        IRBuilder<>(&I).CreateLifetimeStart(&I);
+        hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
+              MSSAU, ORE);
+        HoistedInstructions.push_back(&I);
+        auto CI = cast<CallInst>(&I);
+
+        // One deallocation will be cloned into all the necessary exit blocks
+        // then the deallocations will be deleted.
+        CallInst *Pattern = LAI->FreesForMalloc(CI)[0];
+        Pattern->dropUnknownNonDebugMetadata();
+
+        // In case it is freeing a copy of the pointer not available in all
+        // exits of the loop.
+        Pattern->setArgOperand(0, CI);
+
+        // Merge debug locations. We don't track which frees have paths to which
+        // exit blocks, so we merge all the debug info to put on all the frees
+        // being placed in the exit blocks.
+        for (auto FreeCall : LAI->FreesForMalloc(CI)) {
+          Pattern->applyMergedLocation(Pattern->getDebugLoc(),
+                                       FreeCall->getDebugLoc());
+        }
+
+        // Insert frees into the loop exit blocks where we know the pointer
+        // would have been freed.
+        for (auto ExitBlock : LAI->ExitBlocksWithPointerFreed(CI)) {
+          Pattern->clone()->insertBefore(&*ExitBlock->getFirstInsertionPt());
+        }
+
+        // Replace the original frees with a @lifetime.end marker.
+        for (auto FreeCall : LAI->FreesForMalloc(CI)) {
+          IRBuilder<>(FreeCall).CreateLifetimeEnd(&I);
+          eraseInstruction(*FreeCall, *SafetyInfo, CurAST, MSSAU);
+        }
+
+        Changed = true;
+        continue;
+      }
+
       // Attempt to remove floating point division out of the loop by
       // converting it to a reciprocal multiplication.
       if (I.getOpcode() == Instruction::FDiv &&
@@ -2228,7 +2296,7 @@
   for (BasicBlock *BB : CurLoop->getBlocks())
     for (Instruction &I : *BB) {
       if (N >= LICMN2Theshold) {
-        LLVM_DEBUG(dbgs() << "Alasing N2 threshold exhausted for "
+        LLVM_DEBUG(dbgs() << "Aliasing N2 threshold exhausted for "
                           << *(MemLoc.Ptr) << "\n");
         return true;
       }
Index: llvm/test/Transforms/LICM/allocs.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LICM/allocs.ll
@@ -0,0 +1,750 @@
+; RUN: opt -S -licm < %s | FileCheck %s
+; RUN: opt -S -enable-mssa-loop-dependency -licm < %s | FileCheck %s
+
+declare i8* @malloc(i32) nounwind
+declare void @free(i8* nocapture) nounwind
+
+declare void @use(i8*)
+declare i1 @expr()
+
+define void @test1(i32 %loop_invariant) {
+; CHECK-LABEL: @test1
+header:
+; CHECK-LABEL: header:
+; CHECK: @malloc
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK-NOT: @malloc
+; CHECK: @llvm.lifetime.start
+; CHECK-NOT: @malloc
+; CHECK: @use
+; CHECK-NOT: @free
+; CHECK: @llvm.lifetime.end
+; CHECK-NOT: @free
+  %ptr = call i8* @malloc(i32 %loop_invariant)
+  call void @use(i8* %ptr)
+  call void @free(i8* %ptr)
+  br i1 undef, label %loop, label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK: @free
+  ret void
+}
+
+declare void @mightmalloc()
+
+define void @test2(i32 %loop_invariant) {
+; CHECK-LABEL: @test2
+header:
+; CHECK-LABEL: header:
+; CHECK-NOT: @malloc
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK: @malloc
+; CHECK: @use
+; CHECK: @free
+; CHECK: @mightmalloc
+  %ptr = call i8* @malloc(i32 %loop_invariant)
+  call void @use(i8* %ptr)
+  call void @free(i8* %ptr)
+  call void @mightmalloc()
+  br i1 undef, label %loop, label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK-NOT: @free
+  ret void
+}
+
+define void @test3(i32 %loop_invariant) {
+; CHECK-LABEL: @test3
+header:
+; CHECK-LABEL: header:
+; CHECK-NOT: @malloc
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK: @mightmalloc
+; CHECK: @malloc
+; CHECK: @use
+; CHECK: @free
+  call void @mightmalloc()
+  %ptr = call i8* @malloc(i32 %loop_invariant)
+  call void @use(i8* %ptr)
+  call void @free(i8* %ptr)
+  br i1 undef, label %loop, label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK-NOT: @free
+  ret void
+}
+
+define void @test4(i32 %loop_invariant) {
+; CHECK-LABEL: @test4
+header:
+; CHECK-LABEL: header:
+; CHECK: @malloc
+; CHECK: @malloc
+; CHECK: @malloc
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK-NOT: @malloc
+; CHECK: @llvm.lifetime.start
+; CHECK: @llvm.lifetime.start
+; CHECK: @llvm.lifetime.start
+; CHECK: @use
+; CHECK: @use
+; CHECK: @use
+; CHECK: @llvm.lifetime.end
+; CHECK: @llvm.lifetime.end
+; CHECK: @llvm.lifetime.end
+; CHECK-NOT: @free
+  %ptr1 = call i8* @malloc(i32 %loop_invariant)
+  %ptr2 = call i8* @malloc(i32 %loop_invariant)
+  %ptr3 = call i8* @malloc(i32 %loop_invariant)
+  call void @use(i8* %ptr1)
+  call void @use(i8* %ptr2)
+  call void @use(i8* %ptr3)
+  call void @free(i8* %ptr1)
+  call void @free(i8* %ptr2)
+  call void @free(i8* %ptr3)
+  br i1 undef, label %loop, label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK: @free
+; CHECK: @free
+; CHECK: @free
+  ret void
+}
+
+define void @test5(i32 %loop_invariant) {
+; CHECK-LABEL: @test5
+header:
+; CHECK-LABEL: header:
+; CHECK: @malloc
+; CHECK: @malloc
+; CHECK: @malloc
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK-NOT: @malloc
+; CHECK: @llvm.lifetime.start
+; CHECK: @llvm.lifetime.start
+; CHECK: @llvm.lifetime.start
+; CHECK: @use
+; CHECK: @use
+; CHECK: @use
+; CHECK: @llvm.lifetime.end
+; CHECK: @llvm.lifetime.end
+; CHECK: @llvm.lifetime.end
+; CHECK-NOT: @free
+  %ptr3 = call i8* @malloc(i32 %loop_invariant)
+  %ptr2 = call i8* @malloc(i32 %loop_invariant)
+  %ptr1 = call i8* @malloc(i32 %loop_invariant)
+  call void @use(i8* %ptr1)
+  call void @use(i8* %ptr2)
+  call void @use(i8* %ptr3)
+  call void @free(i8* %ptr1)
+  call void @free(i8* %ptr2)
+  call void @free(i8* %ptr3)
+  br i1 undef, label %loop, label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK: @free
+; CHECK: @free
+; CHECK: @free
+  ret void
+}
+
+define void @test6(i32 %loop_invariant) {
+; CHECK-LABEL: @test6
+header:
+; CHECK-LABEL: header:
+; CHECK: @malloc
+; CHECK: @malloc
+; CHECK-NOT: @malloc
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK: @llvm.lifetime.start
+; CHECK: @malloc
+; CHECK: @llvm.lifetime.start
+; CHECK: @use
+; CHECK: @use
+; CHECK: @use
+; CHECK: @llvm.lifetime.end
+; CHECK: @free
+; CHECK: @llvm.lifetime.end
+  %i = phi i32 [%loop_invariant, %header], [%i.1, %loop]
+  %ptr1 = call i8* @malloc(i32 %loop_invariant)
+  %ptr2 = call i8* @malloc(i32 %i)
+  %ptr3 = call i8* @malloc(i32 %loop_invariant)
+  call void @use(i8* %ptr1)
+  call void @use(i8* %ptr2)
+  call void @use(i8* %ptr3)
+  call void @free(i8* %ptr1)
+  call void @free(i8* %ptr2)
+  call void @free(i8* %ptr3)
+  %i.1 = add i32 %i, 1
+  br i1 undef, label %loop, label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK: @free
+; CHECK: @free
+; CHECK-NOT: @free
+; CHECK: ret void
+  ret void
+}
+
+define void @test7(i32 %loop_invariant) {
+; CHECK-LABEL: @test7
+header:
+; CHECK-LABEL: header:
+; CHECK: @malloc
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK-NOT: @malloc
+; CHECK: @llvm.lifetime.start
+; CHECK-NOT: @malloc
+; CHECK: @use
+; CHECK-NOT: @free
+; CHECK-NOT: @llvm.lifetime.end
+  %i = phi i32 [%loop_invariant, %header], [%i.1, %loop.2], [%i.1, %loop.3]
+  %ptr = call i8* @malloc(i32 %loop_invariant)
+  call void @use(i8* %ptr)
+  %i.1 = add i32 %i, 1
+  %cmp = icmp eq i32 %i.1, 123
+  br i1 %cmp, label %loop.2, label %loop.3
+
+loop.2:
+; CHECK-LABEL: loop.2:
+; CHECK-NOT: @free
+; CHECK: @llvm.lifetime.end
+; CHECK-NOT: @free
+  %continue.1 = call i1 @expr()
+  call void @free(i8* %ptr)
+  br i1 %continue.1, label %loop, label %loopexit
+
+loop.3:
+; CHECK-LABEL: loop.3:
+; CHECK-NOT: @free
+; CHECK: @llvm.lifetime.end
+; CHECK-NOT: @free
+  %continue.2 = call i1 @expr()
+  call void @free(i8* %ptr)
+  br i1 %continue.2, label %loop, label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK: @free
+  ret void
+}
+
+declare void @assert_fail()
+
+define void @test8(i32 %loop_invariant) {
+; CHECK-LABEL: @test8
+header:
+; CHECK-LABEL: header:
+; CHECK: @malloc
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK-NOT: @malloc
+; CHECK: @llvm.lifetime.start
+; CHECK-NOT: @malloc
+  %ptr = call i8* @malloc(i32 %loop_invariant)
+  %fail = call i1 @expr()
+  br i1 %fail, label %assert_fail, label %assert_pass
+
+assert_fail:
+  call void @assert_fail()
+  unreachable
+
+assert_pass:
+; CHECK-LABEL: assert_pass:
+; CHECK-NOT: @free
+; CHECK: @llvm.lifetime.end
+; CHECK-NOT: @free
+  call void @free(i8* %ptr)
+  br i1 undef, label %loop, label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK: @free
+  ret void
+}
+
+declare void @usei64(i64)
+
+define void @test9(i32 %loop_invariant) {
+; CHECK-LABEL: @test9
+header:
+; CHECK-LABEL: header:
+; CHECK: add
+; CHECK: @malloc
+; CHECK: ptrtoint
+; CHECK: and
+; CHECK-NOT: usei64
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK-NOT: @malloc
+; CHECK: @llvm.lifetime.start
+; CHECK-NOT: @malloc
+; CHECK: usei64
+  %size = add i32 %loop_invariant, 1
+  %ptr = call i8* @malloc(i32 %size)
+  %ptrbits = ptrtoint i8* %ptr to i64
+  %aligncheck = and i64 %ptrbits, 4
+  call void @usei64(i64%aligncheck)
+  %fail = call i1 @expr()
+  br i1 %fail, label %assert_fail, label %assert_pass
+
+assert_fail:
+  call void @assert_fail()
+  unreachable
+
+assert_pass:
+; CHECK-LABEL: assert_pass:
+; CHECK-NOT: @free
+; CHECK: @llvm.lifetime.end
+; CHECK-NOT: @free
+  call void @free(i8* %ptr)
+  br i1 undef, label %loop, label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK: @free
+  ret void
+}
+
+declare x86_stdcallcc void @_CxxThrowException(i8*, i8*)
+
+declare i32 @__CxxFrameHandler3(...)
+
+define void @test10(i32 %loop_invariant) personality i32 (...)* @__CxxFrameHandler3 {
+; CHECK-LABEL: @test10
+header:
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK: @malloc
+; CHECK: @use
+; CHECK: @free
+  %ptr = call i8* @malloc(i32 %loop_invariant)
+  call void @use(i8* %ptr)
+  call void @free(i8* %ptr)
+  invoke void @_CxxThrowException(i8* null, i8* null)
+          to label %continue unwind label %catch.dispatch
+
+continue:
+  %take.backedge = call i1 @expr()
+  br i1 %take.backedge, label %loop, label %loopexit
+
+catch.dispatch:
+  %cs = catchswitch within none [label %catch] unwind to caller
+
+catch:                                            ; preds = %catch.dispatch
+  %cp = catchpad within %cs [i8* null, i32 64, i8* null]
+  catchret from %cp to label %loopexit
+
+loopexit:
+  ret void
+}
+
+define void @test11(i32 %loop_invariant) personality i32 (...)* @__CxxFrameHandler3 {
+; CHECK-LABEL: @test11
+header:
+; CHECK-LABEL: header:
+; CHECK: @malloc
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK-NOT: @malloc
+; CHECK: @llvm.lifetime.start
+; CHECK-NOT: @malloc
+; CHECK: @use
+  %ptr = call i8* @malloc(i32 %loop_invariant)
+  call void @use(i8* %ptr)
+  invoke void @_CxxThrowException(i8* null, i8* null)
+          to label %continue unwind label %catch.dispatch
+
+continue:
+; CHECK_LABEL: continue:
+; CHECK-NOT: free
+; CHECK: lifetime.end
+; CHECK-NOT: free
+  %take.backedge = call i1 @expr()
+  call void @free(i8* %ptr)
+  br i1 %take.backedge, label %loop, label %loopexit
+
+catch.dispatch:
+; CHECK-LABEL: catch.dispatch:
+; CHECK-NEXT: catchswitch
+  %cs = catchswitch within none [label %catch] unwind to caller
+
+catch:                                            ; preds = %catch.dispatch
+  %cp = catchpad within %cs [i8* null, i32 64, i8* null]
+  catchret from %cp to label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK: @free
+  ret void
+}
+
+declare float @llvm.log.f32(float %Val)
+
+define void @test12(i32 %loop_invariant) {
+; CHECK-LABEL: @test1
+header:
+; CHECK-LABEL: header:
+; CHECK: @malloc
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK-NOT: @malloc
+; CHECK: @llvm.lifetime.start
+; CHECK-NOT: @malloc
+; CHECK: @use
+; CHECK-NOT: @free
+; CHECK: @llvm.lifetime.end
+; CHECK-NOT: @free
+  call float @llvm.log.f32(float 0.0)
+  %ptr = call i8* @malloc(i32 %loop_invariant)
+  call void @use(i8* %ptr)
+  call float @llvm.log.f32(float 0.0)
+  call void @free(i8* %ptr)
+  call float @llvm.log.f32(float 0.0)
+  br i1 undef, label %loop, label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK: @free
+  ret void
+}
+
+declare void @llvm.objc.autoreleasePoolPop(i8*)
+declare i8* @llvm.objc.autoreleasePoolPush()
+
+define void @test13(i32 %loop_invariant) {
+; CHECK-LABEL: @test13
+header:
+; CHECK-LABEL: header:
+; CHECK-NOT: @malloc
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK: @malloc
+; CHECK: @use
+; CHECK: @free
+  %managed_ptr = call i8* @llvm.objc.autoreleasePoolPush()
+  %ptr = call i8* @malloc(i32 %loop_invariant)
+  call void @use(i8* %ptr)
+  call void @free(i8* %ptr)
+  br i1 undef, label %loop, label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK-NOT: @free
+  ret void
+}
+
+define void @test14(i32 %loop_invariant) {
+; CHECK-LABEL: @test14
+header:
+; CHECK-LABEL: header:
+; CHECK-NOT: @malloc
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK: @malloc
+; CHECK: @use
+; CHECK: @free
+  %ptr = call i8* @malloc(i32 %loop_invariant)
+  call void @use(i8* %ptr)
+  call void @free(i8* %ptr)
+  %managed_ptr = call i8* @llvm.objc.autoreleasePoolPush()
+  br i1 undef, label %loop, label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK-NOT: @free
+  ret void
+}
+
+declare i8 @expr8()
+
+define void @test15(i32 %loop_invariant) {
+; CHECK-LABEL: @test15
+header:
+; CHECK-LABEL: header:
+; CHECK: @malloc
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+  %ptr = call i8* @malloc(i32 %loop_invariant)
+  call void @use(i8* %ptr)
+  %Val = call i8 @expr8()
+  switch i8 %Val, label %b1 [ i8 2, label %b2
+                              i8 3, label %b3 ]
+
+b1:
+  call void @usei64(i64 1)
+  %a = call i1 @expr()
+  call void @free(i8* %ptr), !dbg !6
+  br i1 %a, label %loop, label %loopexit
+
+b2:
+  call void @usei64(i64 2)
+  %b = call i1 @expr()
+  call void @free(i8* %ptr), !dbg !7
+  br i1 %b, label %loop, label %loopexit
+
+b3:
+  call void @usei64(i64 3)
+  %c = call i1 @expr()
+  call void @free(i8* %ptr), !dbg !8
+  br i1 %c, label %loop, label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK: @free{{.*}}, !dbg [[test15MergedLoc:![0-9]+]]
+  ret void
+}
+
+define void @test16(i32 %loop_invariant, i8* %vptr) {
+; CHECK-LABEL: @test16
+header:
+; CHECK-LABEL: header:
+; CHECK-NOT: @malloc
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK: @malloc
+; CHECK: @use
+; CHECK: @free
+  %Val = load volatile i8, i8* %vptr
+  %ptr = call i8* @malloc(i32 %loop_invariant)
+  call void @use(i8* %ptr)
+  call void @free(i8* %ptr)
+  br i1 undef, label %loop, label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK-NOT: @free
+  ret void
+}
+
+define void @test17(i32 %loop_invariant) {
+; CHECK-LABEL: @test17
+header:
+; CHECK-LABEL: header:
+; CHECK: @malloc
+; CHECK: bitcast
+; CHECK: getelementptr
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK-NOT: @malloc
+; CHECK: @llvm.lifetime.start
+; CHECK-NOT: @malloc
+  %ptr = call i8* @malloc(i32 %loop_invariant)
+  %ptr2 = bitcast i8* %ptr to i8*
+  %ptr3 = getelementptr i8, i8* %ptr2, i32 0
+br label %nextloopblock
+
+nextloopblock:
+  %ptr4 = phi i8* [%ptr3, %loop]
+  call void @free(i8* %ptr4)
+  br i1 undef, label %loop, label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK: @free
+  ret void
+}
+
+define void @test18(i32 %loop_invariant) {
+; CHECK-LABEL: @test18
+header:
+; CHECK-LABEL: header:
+; CHECK: @malloc
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK-NOT: @malloc
+; CHECK: @llvm.lifetime.start
+; CHECK-NOT: @malloc
+  %ptr = call i8* @malloc(i32 %loop_invariant)
+  %continue = call i1 @expr()
+  %branch = call i1 @expr()
+  br i1 %branch, label %nextbb1, label %nextbb2
+
+nextbb1:
+  %ptr2 = bitcast i8* %ptr to i8*
+  call void @free(i8* %ptr2)
+  br label %nextloopblock
+
+nextbb2:
+  call void @free(i8* %ptr)
+  br label %nextloopblock
+
+nextloopblock:
+  br i1 %continue, label %loop, label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK-NOT: undef
+; CHECK: @free
+  ret void
+}
+
+declare void @might_malloc_or_throw()
+
+define void @test19(i32 %loop_invariant) personality i32 (...)* @__CxxFrameHandler3 {
+; CHECK-LABEL: @test19
+header:
+; CHECK-LABEL: header:
+; CHECK-NOT: @malloc
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK: @malloc
+; CHECK: @use
+; CHECK: @free
+; CHECK: @might_malloc_or_throw
+  %ptr = call i8* @malloc(i32 %loop_invariant)
+  call void @use(i8* %ptr)
+  call void @free(i8* %ptr)
+  invoke void @might_malloc_or_throw() to label %loop unwind label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK-NOT: @free
+  %lpad = landingpad { i8*, i32 } catch i8* null
+  ret void
+}
+
+define void @test20(i32 %loop_invariant) {
+; CHECK-LABEL: @test20
+header:
+; CHECK-LABEL: header:
+; CHECK: @malloc
+; CHECK: br label %loop.header
+  br label %loop.header
+
+loop.header:
+; CHECK-LABEL: loop.header:
+; CHECK-NOT: @malloc
+; CHECK: @llvm.lifetime.start
+; CHECK-NOT: @malloc
+; CHECK: @use
+; CHECK-NOT: @free
+; CHECK: @llvm.lifetime.end
+; CHECK-NOT: @free
+  %ptr = call i8* @malloc(i32 %loop_invariant)
+  call void @use(i8* %ptr)
+  call void @free(i8* %ptr)
+  br i1 undef, label %loop.header, label %loop2
+
+loop2:
+  br i1 undef, label %loop.header, label %loop.exit
+
+loop.exit:
+; CHECK-LABEL: loop.exit:
+; CHECK: @free
+  ret void
+}
+
+define void @test21(i32 %loop_invariant) {
+; CHECK-LABEL: @test21
+header:
+; CHECK-LABEL: header:
+; CHECK-NOT: @malloc
+; CHECK: br label %loop.header
+  br label %loop.header
+
+loop.header:
+; CHECK-LABEL: loop.header:
+; CHECK: @malloc
+; CHECK: @use
+; CHECK: @free
+  %ptr = call i8* @malloc(i32 %loop_invariant)
+  call void @use(i8* %ptr)
+  call void @free(i8* %ptr)
+  br i1 undef, label %loop.header, label %loop2
+
+loop2:
+; CHECK-LABEL: loop2:
+; CHECK: call i1 @expr
+  ; This call after @free may increase total heap memory and therefore must
+  ; stay after the free call.
+  %A = call i1 @expr()
+  br i1 %A, label %loop.header, label %loop.exit
+
+loop.exit:
+; CHECK-LABEL: loop.exit:
+; CHECK-NOT: @free
+  ret void
+}
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.dbg.cu = !{!3}
+
+; CHECK: [[test15MergedLoc]] = !DILocation(line: 0
+!0 = !{i32 2, !"Dwarf Version", i32 4}
+!1 = !{i32 2, !"Debug Info Version", i32 3}
+!2 = !{i32 1, !"PIC Level", i32 2}
+!3 = distinct !DICompileUnit(language: DW_LANG_C99, file: !4)
+!4 = !DIFile(filename: "allocs.ll", directory: "test/Transforms/LICM")
+!5 = distinct !DISubprogram(name: "test/Transforms/LICM/allocs.ll", unit: !3)
+!6 = !DILocation(line: 1, column: 10, scope: !5)
+!7 = !DILocation(line: 2, column: 20, scope: !5)
+!8 = !DILocation(line: 3, column: 30, scope: !5)