Index: llvm/include/llvm/Analysis/LoopAllocationInfo.h
===================================================================
--- /dev/null
+++ llvm/include/llvm/Analysis/LoopAllocationInfo.h
@@ -0,0 +1,104 @@
+//===- LoopAllocationInfo.h - memory allocations inside loops ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_LOOPALLOCATIONINFO_H
+#define LLVM_ANALYSIS_LOOPALLOCATIONINFO_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+class BasicBlock;
+class CallInst;
+class DominatorTree;
+class Loop;
+class LoopInfo;
+class TargetLibraryInfo;
+
+/// Track ptr = allocfn to one or more freefn(ptr) relationships within the
+/// loop.
+///
+/// This analysis stores information about allocation and deallocation calls
+/// where the pointer is only live within the loop. Its design is tighly coupled
+/// to the internal implementation of LICM, but is exposed in a public API
+/// because the hoistRegion and sinkRegion functions in LICM are also exposed.
+///
+/// An allocation must be both hoisted and sunk, or neither. LICM performs
+/// sinking first, then hoisting. As sinking and hoisting is performed, more
+/// instructions will appear to be loop invariant, making more opportunities.
+/// For this reason we split the analysis into two pieces:
+///   1. for a given pointer that is only live in the loop, retain a list its
+///      matching deallocating instructions.
+///   2. during sinking, if we would sink the deallocation, don't, but inform
+///      the LoopAllocationInfo. If all deallocations would have been sunk, and
+///      hoistSink would hoist the allocation, perform both the hoist and sink
+///      the various deallocation instructions.
+///
+/// We don't need guaranteed execution of any deallocation function during
+/// unwinding. Before this transformation, an unwind mid-loop would leak one
+/// pointer that was allocated at the start of the loop. After this
+/// transformation, it would leak the one pointer allocated before the loop
+/// began. There is no difference visible to the caller.
+///
+/// We do have to ensure that the maximum amount of allocated memory is not
+/// raised by this transformation. To ensure this, we stop looking for
+/// allocations after seeing an opaque function call. For example:
+///   loop_top:
+///     %ptr1 = call i8* @malloc(i32 %loop_invariant1)  ;; candidate
+///     %ptr2 = call i8* @malloc(i32 %loop_variant2)    ;; not candidate
+///     %ptr3 = call i8* @malloc(i32 %loop_invariant3)  ;; candidate
+///     call void @opaque()                             ;; stop scanning
+///     %ptr5 = call i8* @malloc(i32 %loop_invariant5)  ;; not candidate
+///
+/// The same idea applies in reverse order to @free calls at the loop exits:
+///   loop_exiting:
+///      call void @free(%ptr1)  ;; not candidate
+///      call void @opaque()     ;; stop scanning
+///      call void @free(%ptr2)  ;; %ptr2 is candidate
+///                              ;; start here and scan upwards
+class LoopAllocationInfo {
+public:
+  void analyzeLoop(LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI,
+                   Loop *CurLoop);
+
+  // sinkRegion has determined that this deallocation call may be sunk.
+  void addSafeToSink(const CallInst *CI);
+
+  // hoistRegion would like to know whether the given allocation call may be
+  // hoisted.
+  bool mayHoist(const CallInst *CI) const;
+
+  // Retrieve the list of free calls that should be removed from the loop.
+  const SmallVector<CallInst *, 16> &toSink(const CallInst *CI);
+
+  // Retrieve the list of loop exit blocks which must have a free inserted.
+  // This is a subset or equal to the loops exit blocks.
+  const SmallVector<BasicBlock *, 16> &addDeallocations(const CallInst *CI);
+
+  // Returns whether the loop has any potentially-hoistable allocations.
+  bool empty() const { return Allocations.empty(); }
+
+private:
+  // The n-th index into these vectors finds the matching allocation and
+  // deallocations.
+  SmallVector<const CallInst *, 4> Allocations;
+  SmallVector<SmallVector<CallInst *, 16>, 4> Deallocations;
+  SmallVector<SmallVector<BasicBlock *, 16>, 4> ExitBlocksToAddDeallocationsTo;
+
+  // Look up the index into the above vector given a call.
+  DenseMap<const CallInst *, size_t> AllocationLookup;
+  DenseMap<const CallInst *, size_t> DeallocationLookup;
+
+  DenseSet<const CallInst *> SafeToSink;
+};
+
+} // namespace llvm
+
+#endif
Index: llvm/include/llvm/IR/Instruction.h
===================================================================
--- llvm/include/llvm/IR/Instruction.h
+++ llvm/include/llvm/IR/Instruction.h
@@ -536,6 +536,12 @@
     return mayReadFromMemory() || mayWriteToMemory();
   }
 
+  /// Return true if this instruction may allocate heap memory.
+  bool mayAllocateMemory() const;
+
+  /// Return true if this instruction may deallocate heap memory.
+  bool mayDeallocateMemory() const;
+
   /// Return true if this instruction has an AtomicOrdering of unordered or
   /// higher.
   bool isAtomic() const;
Index: llvm/include/llvm/Transforms/Utils/LoopUtils.h
===================================================================
--- llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -39,6 +39,7 @@
 class BasicBlock;
 class DataLayout;
 class Loop;
+class LoopAllocationInfo;
 class LoopInfo;
 class MemoryAccess;
 class MemorySSAUpdater;
@@ -112,7 +113,7 @@
 bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
                 TargetLibraryInfo *, TargetTransformInfo *, Loop *,
                 AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
-                bool, int &, OptimizationRemarkEmitter *);
+                LoopAllocationInfo *, bool, int &, OptimizationRemarkEmitter *);
 
 /// Walk the specified region of the CFG (defined by all blocks
 /// dominated by the specified block, and that are in the current loop) in depth
@@ -124,8 +125,8 @@
 /// ORE. It returns changed status.
 bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
                  TargetLibraryInfo *, Loop *, AliasSetTracker *,
-                 MemorySSAUpdater *, ICFLoopSafetyInfo *, bool, int &,
-                 OptimizationRemarkEmitter *);
+                 MemorySSAUpdater *, ICFLoopSafetyInfo *, LoopAllocationInfo *,
+                 bool, int &, OptimizationRemarkEmitter *);
 
 /// This function deletes dead loops. The caller of this function needs to
 /// guarantee that the loop is infact dead.
Index: llvm/lib/Analysis/CMakeLists.txt
===================================================================
--- llvm/lib/Analysis/CMakeLists.txt
+++ llvm/lib/Analysis/CMakeLists.txt
@@ -50,6 +50,7 @@
   Lint.cpp
   Loads.cpp
   LoopAccessAnalysis.cpp
+  LoopAllocationInfo.cpp
   LoopAnalysisManager.cpp
   LoopUnrollAnalyzer.cpp
   LoopInfo.cpp
Index: llvm/lib/Analysis/LoopAllocationInfo.cpp
===================================================================
--- /dev/null
+++ llvm/lib/Analysis/LoopAllocationInfo.cpp
@@ -0,0 +1,235 @@
+//===- LoopAllocationInfo.cpp - memory allocations inside loops -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopAllocationInfo.h"
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Casting.h"
+
+namespace llvm {
+
+/// Track ptr = allocfn to one or more freefn(ptr) relationships within the
+/// loop.
+///
+/// This analysis stores information about allocation and deallocation calls
+/// where the pointer is only live within the loop.
+///
+/// An allocation must be both hoisted and sunk, or neither. LICM performs
+/// sinking first, then hoisting. As sinking and hoisting is performed, more
+/// instructions will appear to be loop invariant, making more opportunities.
+/// For this reason we split the analysis into two pieces:
+///   1. for a given pointer that is only live in the loop, retain a list its
+///      matching deallocating instructions.
+///   2. during sinking, if we would sink the deallocation, don't, but inform
+///      the LoopAllocationInfo. If all deallocations would have been sunk, and
+///      hoistSink would hoist the allocation, perform both the hoist and sink
+///      the various deallocation instructions.
+///
+/// We don't need guaranteed execution of any deallocation function during
+/// unwinding. Before this transformation, an unwind mid-loop would leak one
+/// pointer that was allocated at the start of the loop. After this
+/// transformation, it would leak the one pointer allocated before the loop
+/// began. There is no difference visible to the caller.
+///
+/// We do have to ensure that the maximum amount of allocated memory is not
+/// raised by this transformation. To ensure this, we stop looking for
+/// allocations after seeing an opaque function call. For example:
+///   loop_top:
+///     %ptr1 = call i8* @malloc(i32 %loop_invariant1)  ;; candidate
+///     %ptr2 = call i8* @malloc(i32 %loop_variant2)    ;; not candidate
+///     %ptr3 = call i8* @malloc(i32 %loop_invariant3)  ;; candidate
+///     call void @opaque()                             ;; stop scanning
+///     %ptr5 = call i8* @malloc(i32 %loop_invariant5)  ;; not candidate
+///
+/// The same idea applies in reverse order to @free calls at the loop exits:
+///   loop_exiting:
+///      call void @free(%ptr1)  ;; not candidate
+///      call void @opaque()     ;; stop scanning
+///      call void @free(%ptr2)  ;; %ptr2 is candidate
+///                              ;; start here and scan upwards
+void LoopAllocationInfo::analyzeLoop(LoopInfo *LI, DominatorTree *DT,
+                                     TargetLibraryInfo *TLI, Loop *CurLoop) {
+  assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
+         "Unexpected input to LoopAllocationInfo::analyzeLoop.");
+  if (!TLI)
+    return;
+
+  SmallVector<BasicBlock *, 32> LatchBlocks;
+  CurLoop->getLoopLatches(LatchBlocks);
+  if (LatchBlocks.empty())
+    return;
+
+  SmallVector<BasicBlock *, 32> ExitBlocks;
+  CurLoop->getExitBlocks(ExitBlocks);
+  if (ExitBlocks.empty())
+    return;
+
+  // Unlike hoistRegion, we only scan the header block. If the allocation is in
+  // a later block, it must be behind some sort of branch which implies that
+  // it's conditional on something or else another optimization would've removed
+  // the unconditional branch earlier).
+  BasicBlock *HeaderBB = CurLoop->getHeader();
+  SmallPtrSet<BasicBlock *, 1> HeaderBlock;
+  HeaderBlock.insert(HeaderBB);
+  for (BasicBlock::iterator II = HeaderBB->begin(), E = HeaderBB->end();
+       II != E; ++II) {
+    if (!II->mayDeallocateMemory() && !II->mayAllocateMemory())
+      continue;
+    if (!llvm::isAllocationFn(&*II, TLI))
+      break;
+    auto CI = cast<CallInst>(II);
+
+    // Find places in the loop where the pointer is certainly freed. The places
+    // we identify must free this allocation, but there may be any number of
+    // other places that could free our pointer which we miss.
+    //
+    // We use this to ensure that the allocation is freed before we go around
+    // any loop latch.
+    SmallVector<CallInst *, 16> FreeCalls;
+    constexpr size_t UseVisitThreshold = 20;
+    SmallVector<Instruction *, UseVisitThreshold> Worklist;
+    SmallPtrSet<Instruction *, UseVisitThreshold> Visited;
+    auto Enqueue = [&](const Instruction *I) {
+      for (const Use &U : I->uses()) {
+        Instruction *User = cast<Instruction>(U.getUser());
+        if (!CurLoop->contains(User))
+          continue;
+        if (!Visited.insert(User).second)
+          continue;
+        if (Visited.size() == UseVisitThreshold)
+          return;
+        Worklist.push_back(User);
+      }
+    };
+    Enqueue(CI);
+    do {
+      Instruction *I = Worklist.pop_back_val();
+      if (isa<BitCastInst>(I)) {
+        Enqueue(I);
+      } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
+        if (GEP->hasAllZeroIndices())
+          Enqueue(GEP);
+      } else if (auto *PN = dyn_cast<PHINode>(I)) {
+        if (PN->hasConstantOrUndefValue())
+          Enqueue(I);
+      } else if (llvm::isFreeCall(I, TLI)) {
+        FreeCalls.push_back(cast<CallInst>(I));
+      }
+    } while (!Worklist.empty() && Visited.size() < UseVisitThreshold);
+
+    if (FreeCalls.empty() || Visited.size() >= UseVisitThreshold)
+      continue;
+
+    SmallPtrSet<BasicBlock *, 32> FreeCallBBs;
+    for (auto FreeCall : FreeCalls)
+      FreeCallBBs.insert(FreeCall->getParent());
+
+    bool DoNotTransform = false;
+
+    // Check that the frees we found cover all the latches.
+    for (BasicBlock *LatchBB : LatchBlocks) {
+      if (FreeCallBBs.count(LatchBB))
+        continue;
+      SmallVector<BasicBlock *, 32> HeaderWorklist;
+      HeaderWorklist.push_back(HeaderBB);
+      if (llvm::isPotentiallyReachableFromMany(HeaderWorklist, LatchBB,
+                                               &FreeCallBBs, DT, LI)) {
+        DoNotTransform = true;
+        break;
+      }
+    }
+    if (DoNotTransform)
+      continue;
+
+    // Examine each exit block and determine whether was a free which is
+    // unconditionally executed on the way to the exit block.
+    //
+    // We use two queries, is there a path from any free to this exit without
+    // going back to the header (if not, do not insert a free), and is there a
+    // path from the path from the header to this exit without going through any
+    // frees (if not, we must insert a free). Both of these queries can return
+    // true, in which case the deallocation is conditional or there are other
+    // deallocations we can not see, and we do not perform the transform.
+    SmallVector<BasicBlock *, 16> InsertDeallocation;
+    for (auto ExitBB : ExitBlocks) {
+      SmallVector<BasicBlock *, 32> HeaderWorklist, FreeCallWorklist;
+      HeaderWorklist.push_back(HeaderBB);
+      FreeCallWorklist.insert(FreeCallWorklist.end(), FreeCallBBs.begin(),
+                              FreeCallBBs.end());
+      bool A = llvm::isPotentiallyReachableFromMany(HeaderWorklist, ExitBB,
+                                                    &FreeCallBBs, DT, LI);
+      bool B = FreeCallBBs.count(HeaderBB) ||
+               llvm::isPotentiallyReachableFromMany(FreeCallWorklist, ExitBB,
+                                                    &HeaderBlock, DT, LI);
+      assert((A || B) && "loop exit blocks not reachable from loop");
+      if (A && B) {
+        // We can't tell whether we pass through a free on the way to this exit
+        // block.
+        DoNotTransform = true;
+        break;
+      }
+      if (B)
+        InsertDeallocation.push_back(ExitBB);
+    }
+    if (DoNotTransform)
+      continue;
+
+    AllocationLookup[CI] = Allocations.size();
+    for (const CallInst *FreeCall : FreeCalls)
+      DeallocationLookup[FreeCall] = Allocations.size();
+    Allocations.push_back(CI);
+    Deallocations.emplace_back(std::move(FreeCalls));
+    ExitBlocksToAddDeallocationsTo.emplace_back(std::move(InsertDeallocation));
+  }
+}
+
+// sinkRegion has determined that this deallocation call may be sunk.
+void LoopAllocationInfo::addSafeToSink(const CallInst *CI) {
+  if (DeallocationLookup.count(CI))
+    SafeToSink.insert(CI);
+}
+
+// hoistRegion would like to know whether the given allocation call may be
+// hoisted.
+bool LoopAllocationInfo::mayHoist(const CallInst *CI) const {
+  auto it = AllocationLookup.find(CI);
+  if (it == AllocationLookup.end())
+    return false;
+  size_t index = it->second;
+  assert(index < Deallocations.size());
+  for (const CallInst *FreeCall : Deallocations[index]) {
+    if (!SafeToSink.count(FreeCall))
+      return false;
+  }
+  return true;
+}
+
+const SmallVector<CallInst *, 16> &
+LoopAllocationInfo::toSink(const CallInst *CI) {
+  assert(AllocationLookup.find(CI) != AllocationLookup.end());
+  return Deallocations[AllocationLookup[CI]];
+}
+
+const SmallVector<BasicBlock *, 16> &
+LoopAllocationInfo::addDeallocations(const CallInst *CI) {
+  assert(AllocationLookup.find(CI) != AllocationLookup.end());
+  return ExitBlocksToAddDeallocationsTo[AllocationLookup[CI]];
+}
+
+} // namespace llvm
Index: llvm/lib/IR/Instruction.cpp
===================================================================
--- llvm/lib/IR/Instruction.cpp
+++ llvm/lib/IR/Instruction.cpp
@@ -548,6 +548,46 @@
   }
 }
 
+bool Instruction::mayAllocateMemory() const {
+  switch (getOpcode()) {
+  default:
+    return false;
+  case Instruction::CallBr:
+    return true;
+  case Instruction::Call: {
+    auto II = dyn_cast<IntrinsicInst>(this);
+    if (!II)
+      return true;
+    switch (II->getIntrinsicID()) {
+    default:
+      return false;
+    case Intrinsic::objc_autoreleasePoolPush:
+      return true;
+    }
+  }
+  }
+}
+
+bool Instruction::mayDeallocateMemory() const {
+  switch (getOpcode()) {
+  default:
+    return false;
+  case Instruction::CallBr:
+    return true;
+  case Instruction::Call: {
+    auto II = dyn_cast<IntrinsicInst>(this);
+    if (!II)
+      return true;
+    switch (II->getIntrinsicID()) {
+    default:
+      return false;
+    case Intrinsic::objc_autoreleasePoolPop:
+      return true;
+    }
+  }
+  }
+}
+
 bool Instruction::isAtomic() const {
   switch (getOpcode()) {
   default:
Index: llvm/lib/Transforms/Scalar/LICM.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/LICM.cpp
+++ llvm/lib/Transforms/Scalar/LICM.cpp
@@ -40,6 +40,7 @@
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/GuardUtils.h"
 #include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/LoopAllocationInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/LoopPass.h"
@@ -56,6 +57,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
@@ -342,6 +344,11 @@
   ICFLoopSafetyInfo SafetyInfo(DT);
   SafetyInfo.computeLoopSafetyInfo(L);
 
+  // Scan for matching malloc/free pairs.
+  LoopAllocationInfo LAI;
+  if (L->hasDedicatedExits() && Preheader)
+    LAI.analyzeLoop(LI, DT, TLI, L);
+
   // We want to visit all of the instructions in this loop... that are not parts
   // of our subloops (they have already had their invariants hoisted out of
   // their loop, into this loop, so there is no need to process the BODIES of
@@ -354,11 +361,11 @@
   //
   if (L->hasDedicatedExits())
     Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
-                          CurAST.get(), MSSAU.get(), &SafetyInfo,
+                          CurAST.get(), MSSAU.get(), &SafetyInfo, &LAI,
                           NoOfMemAccTooLarge, LicmMssaOptCounter, ORE);
   if (Preheader)
     Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
-                           CurAST.get(), MSSAU.get(), &SafetyInfo,
+                           CurAST.get(), MSSAU.get(), &SafetyInfo, &LAI,
                            NoOfMemAccTooLarge, LicmMssaOptCounter, ORE);
 
   // Now that all loop invariants have been removed from the loop, promote any
@@ -463,12 +470,13 @@
                       DominatorTree *DT, TargetLibraryInfo *TLI,
                       TargetTransformInfo *TTI, Loop *CurLoop,
                       AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
-                      ICFLoopSafetyInfo *SafetyInfo, bool NoOfMemAccTooLarge,
-                      int &LicmMssaOptCounter, OptimizationRemarkEmitter *ORE) {
+                      ICFLoopSafetyInfo *SafetyInfo, LoopAllocationInfo *LAI,
+                      bool NoOfMemAccTooLarge, int &LicmMssaOptCounter,
+                      OptimizationRemarkEmitter *ORE) {
 
   // Verify inputs.
   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
-         CurLoop != nullptr && SafetyInfo != nullptr &&
+         CurLoop != nullptr && SafetyInfo != nullptr && LAI != nullptr &&
          "Unexpected input to sinkRegion.");
   assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
          "Either AliasSetTracker or MemorySSA should be initialized.");
@@ -486,6 +494,7 @@
     if (inSubLoop(BB, CurLoop, LI))
       continue;
 
+    bool PassedOpaqueFunction = LAI->empty();
     for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
       Instruction &I = *--II;
 
@@ -518,6 +527,14 @@
           Changed = true;
         }
       }
+
+      if (!PassedOpaqueFunction) {
+        if (llvm::isFreeCall(&I, TLI)) {
+          LAI->addSafeToSink(cast<CallInst>(&I));
+        } else if (I.mayAllocateMemory()) {
+          PassedOpaqueFunction = true;
+        }
+      }
     }
   }
   if (MSSAU && VerifyMemorySSA)
@@ -762,12 +779,12 @@
 bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
                        DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
                        AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
-                       ICFLoopSafetyInfo *SafetyInfo, bool NoOfMemAccTooLarge,
-                       int &LicmMssaOptCounter,
+                       ICFLoopSafetyInfo *SafetyInfo, LoopAllocationInfo *LAI,
+                       bool NoOfMemAccTooLarge, int &LicmMssaOptCounter,
                        OptimizationRemarkEmitter *ORE) {
   // Verify inputs.
   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
-         CurLoop != nullptr && SafetyInfo != nullptr &&
+         CurLoop != nullptr && SafetyInfo != nullptr && LAI != nullptr &&
          "Unexpected input to hoistRegion.");
   assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
          "Either AliasSetTracker or MemorySSA should be initialized.");
@@ -828,6 +845,32 @@
         continue;
       }
 
+      // If it's a malloc/free pair, try hoisting it out to the preheader. We
+      // don't need to worry about intervening unwinding function calls in this
+      // case, so we can be more aggressive than for generic instructions.
+      if (CurLoop->hasLoopInvariantOperands(&I) &&
+          llvm::isAllocationFn(&I, TLI) && LAI->mayHoist(cast<CallInst>(&I))) {
+        IRBuilder<>(&I).CreateLifetimeStart(&I);
+        hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
+              MSSAU, ORE);
+        HoistedInstructions.push_back(&I);
+        Instruction *Pattern = LAI->toSink(cast<CallInst>(&I))[0];
+        for (auto ExitBlock : LAI->addDeallocations(cast<CallInst>(&I))) {
+          Instruction *Clone = Pattern->clone();
+          Clone->dropUnknownNonDebugMetadata();
+          ExitBlock->getInstList().insert(ExitBlock->getFirstInsertionPt(),
+                                          Clone);
+        }
+
+        for (auto FreeCall : LAI->toSink(cast<CallInst>(&I))) {
+          IRBuilder<>(FreeCall).CreateLifetimeEnd(&I);
+          eraseInstruction(*FreeCall, *SafetyInfo, CurAST, MSSAU);
+        }
+
+        Changed = true;
+        continue;
+      }
+
       // Attempt to remove floating point division out of the loop by
       // converting it to a reciprocal multiplication.
       if (I.getOpcode() == Instruction::FDiv &&
Index: llvm/test/Transforms/LICM/allocs.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LICM/allocs.ll
@@ -0,0 +1,347 @@
+; RUN: opt -S -licm < %s | FileCheck %s
+; RUN: opt -S -enable-mssa-loop-dependency -licm < %s | FileCheck %s
+
+declare i8* @malloc(i32)
+declare void @free(i8* nocapture)
+
+declare void @use(i8*)
+declare i1 @expr()
+
+define void @test1(i32 %loop_invariant) {
+; CHECK-LABEL: @test1
+header:
+; CHECK-LABEL: header:
+; CHECK: @malloc
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK-NOT: @malloc
+; CHECK: @llvm.lifetime.start
+; CHECK-NOT: @malloc
+; CHECK: @use
+; CHECK-NOT: @free
+; CHECK: @llvm.lifetime.end
+; CHECK-NOT: @free
+  %ptr = call i8* @malloc(i32 %loop_invariant)
+  call void @use(i8* %ptr)
+  call void @free(i8* %ptr)
+  br i1 undef, label %loop, label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK: @free
+  ret void
+}
+
+declare void @mightmalloc()
+
+define void @test2(i32 %loop_invariant) {
+; CHECK-LABEL: @test2
+header:
+; CHECK-LABEL: header:
+; CHECK-NOT: @malloc
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK: @malloc
+; CHECK: @use
+; CHECK: @free
+; CHECK: @mightmalloc
+  %ptr = call i8* @malloc(i32 %loop_invariant)
+  call void @use(i8* %ptr)
+  call void @free(i8* %ptr)
+  call void @mightmalloc()
+  br i1 undef, label %loop, label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK-NOT: @free
+  ret void
+}
+
+define void @test3(i32 %loop_invariant) {
+; CHECK-LABEL: @test3
+header:
+; CHECK-LABEL: header:
+; CHECK-NOT: @malloc
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK: @mightmalloc
+; CHECK: @malloc
+; CHECK: @use
+; CHECK: @free
+  call void @mightmalloc()
+  %ptr = call i8* @malloc(i32 %loop_invariant)
+  call void @use(i8* %ptr)
+  call void @free(i8* %ptr)
+  br i1 undef, label %loop, label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK-NOT: @free
+  ret void
+}
+
+define void @test4(i32 %loop_invariant) {
+; CHECK-LABEL: @test4
+header:
+; CHECK-LABEL: header:
+; CHECK: @malloc
+; CHECK: @malloc
+; CHECK: @malloc
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK-NOT: @malloc
+; CHECK: @llvm.lifetime.start
+; CHECK: @llvm.lifetime.start
+; CHECK: @llvm.lifetime.start
+; CHECK: @use
+; CHECK: @use
+; CHECK: @use
+; CHECK: @llvm.lifetime.end
+; CHECK: @llvm.lifetime.end
+; CHECK: @llvm.lifetime.end
+; CHECK-NOT: @free
+  %ptr1 = call i8* @malloc(i32 %loop_invariant)
+  %ptr2 = call i8* @malloc(i32 %loop_invariant)
+  %ptr3 = call i8* @malloc(i32 %loop_invariant)
+  call void @use(i8* %ptr1)
+  call void @use(i8* %ptr2)
+  call void @use(i8* %ptr3)
+  call void @free(i8* %ptr1)
+  call void @free(i8* %ptr2)
+  call void @free(i8* %ptr3)
+  br i1 undef, label %loop, label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK: @free
+; CHECK: @free
+; CHECK: @free
+  ret void
+}
+
+define void @test5(i32 %loop_invariant) {
+; CHECK-LABEL: @test5
+header:
+; CHECK-LABEL: header:
+; CHECK: @malloc
+; CHECK: @malloc
+; CHECK: @malloc
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK-NOT: @malloc
+; CHECK: @llvm.lifetime.start
+; CHECK: @llvm.lifetime.start
+; CHECK: @llvm.lifetime.start
+; CHECK: @use
+; CHECK: @use
+; CHECK: @use
+; CHECK: @llvm.lifetime.end
+; CHECK: @llvm.lifetime.end
+; CHECK: @llvm.lifetime.end
+; CHECK-NOT: @free
+  %ptr3 = call i8* @malloc(i32 %loop_invariant)
+  %ptr2 = call i8* @malloc(i32 %loop_invariant)
+  %ptr1 = call i8* @malloc(i32 %loop_invariant)
+  call void @use(i8* %ptr1)
+  call void @use(i8* %ptr2)
+  call void @use(i8* %ptr3)
+  call void @free(i8* %ptr1)
+  call void @free(i8* %ptr2)
+  call void @free(i8* %ptr3)
+  br i1 undef, label %loop, label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK: @free
+; CHECK: @free
+; CHECK: @free
+  ret void
+}
+
+define void @test6(i32 %loop_invariant) {
+; CHECK-LABEL: @test6
+header:
+; CHECK-LABEL: header:
+; CHECK: @malloc
+; CHECK: @malloc
+; CHECK-NOT: @malloc
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK: @llvm.lifetime.start
+; CHECK: @malloc
+; CHECK: @llvm.lifetime.start
+; CHECK: @use
+; CHECK: @use
+; CHECK: @use
+; CHECK: @llvm.lifetime.end
+; CHECK: @free
+; CHECK: @llvm.lifetime.end
+  %i = phi i32 [%loop_invariant, %header], [%i.1, %loop]
+  %ptr1 = call i8* @malloc(i32 %loop_invariant)
+  %ptr2 = call i8* @malloc(i32 %i)
+  %ptr3 = call i8* @malloc(i32 %loop_invariant)
+  call void @use(i8* %ptr1)
+  call void @use(i8* %ptr2)
+  call void @use(i8* %ptr3)
+  call void @free(i8* %ptr1)
+  call void @free(i8* %ptr2)
+  call void @free(i8* %ptr3)
+  %i.1 = add i32 %i, 1
+  br i1 undef, label %loop, label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK: @free
+; CHECK: @free
+; CHECK-NOT: @free
+; CHECK: ret void
+  ret void
+}
+
+define void @test7(i32 %loop_invariant) {
+; CHECK-LABEL: @test7
+header:
+; CHECK-LABEL: header:
+; CHECK: @malloc
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK-NOT: @malloc
+; CHECK: @llvm.lifetime.start
+; CHECK-NOT: @malloc
+; CHECK: @use
+; CHECK-NOT: @free
+; CHECK-NOT: @llvm.lifetime.end
+  %i = phi i32 [%loop_invariant, %header], [%i.1, %loop.2], [%i.1, %loop.3]
+  %ptr = call i8* @malloc(i32 %loop_invariant)
+  call void @use(i8* %ptr)
+  %i.1 = add i32 %i, 1
+  %cmp = icmp eq i32 %i.1, 123
+  br i1 %cmp, label %loop.2, label %loop.3
+
+loop.2:
+; CHECK-LABEL: loop.2:
+; CHECK-NOT: @free
+; CHECK: @llvm.lifetime.end
+; CHECK-NOT: @free
+  %continue.1 = call i1 @expr()
+  call void @free(i8* %ptr)
+  br i1 %continue.1, label %loop, label %loopexit
+
+loop.3:
+; CHECK-LABEL: loop.3:
+; CHECK-NOT: @free
+; CHECK: @llvm.lifetime.end
+; CHECK-NOT: @free
+  %continue.2 = call i1 @expr()
+  call void @free(i8* %ptr)
+  br i1 %continue.2, label %loop, label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK: @free
+  ret void
+}
+
+declare void @assert_fail()
+
+define void @test8(i32 %loop_invariant) {
+; CHECK-LABEL: @test8
+header:
+; CHECK-LABEL: header:
+; CHECK: @malloc
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK-NOT: @malloc
+; CHECK: @llvm.lifetime.start
+; CHECK-NOT: @malloc
+  %ptr = call i8* @malloc(i32 %loop_invariant)
+  %fail = call i1 @expr()
+  br i1 %fail, label %assert_fail, label %assert_pass
+
+assert_fail:
+  call void @assert_fail()
+  unreachable
+
+assert_pass:
+; CHECK-LABEL: assert_pass:
+; CHECK-NOT: @free
+; CHECK: @llvm.lifetime.end
+; CHECK-NOT: @free
+  call void @free(i8* %ptr)
+  br i1 undef, label %loop, label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK: @free
+  ret void
+}
+
+declare void @usei64(i64)
+
+define void @test9(i32 %loop_invariant) {
+; CHECK-LABEL: @test9
+header:
+; CHECK-LABEL: header:
+; CHECK: add
+; CHECK: @malloc
+; CHECK: ptrtoint
+; CHECK: and
+; CHECK-NOT: usei64
+; CHECK: br label %loop
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK-NOT: @malloc
+; CHECK: @llvm.lifetime.start
+; CHECK-NOT: @malloc
+; CHECK: usei64
+  %size = add i32 %loop_invariant, 1
+  %ptr = call i8* @malloc(i32 %size)
+  %ptrbits = ptrtoint i8* %ptr to i64
+  %aligncheck = and i64 %ptrbits, 4
+  call void @usei64(i64%aligncheck)
+  %fail = call i1 @expr()
+  br i1 %fail, label %assert_fail, label %assert_pass
+
+assert_fail:
+  call void @assert_fail()
+  unreachable
+
+assert_pass:
+; CHECK-LABEL: assert_pass:
+; CHECK-NOT: @free
+; CHECK: @llvm.lifetime.end
+; CHECK-NOT: @free
+  call void @free(i8* %ptr)
+  br i1 undef, label %loop, label %loopexit
+
+loopexit:
+; CHECK-LABEL: loopexit:
+; CHECK: @free
+  ret void
+}