diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h
--- a/llvm/include/llvm/Analysis/MemorySSA.h
+++ b/llvm/include/llvm/Analysis/MemorySSA.h
@@ -260,6 +260,16 @@
     return MA->getValueID() == MemoryUseVal || MA->getValueID() == MemoryDefVal;
   }
 
+  void setDefiningAccess(MemoryAccess *DMA, bool Optimized = false,
+                         Optional<AliasResult> AR = MayAlias) {
+    if (!Optimized) {
+      setOperand(0, DMA);
+      return;
+    }
+    setOptimized(DMA);
+    setOptimizedAccessType(AR);
+  }
+
   // Sadly, these have to be public because they are needed in some of the
   // iterators.
   inline bool isOptimized() const;
@@ -296,16 +306,6 @@
     OptimizedAccessAlias = AR;
   }
 
-  void setDefiningAccess(MemoryAccess *DMA, bool Optimized = false,
-                         Optional<AliasResult> AR = MayAlias) {
-    if (!Optimized) {
-      setOperand(0, DMA);
-      return;
-    }
-    setOptimized(DMA);
-    setOptimizedAccessType(AR);
-  }
-
 private:
   Instruction *MemoryInstruction;
   Optional<AliasResult> OptimizedAccessAlias;
@@ -786,6 +786,7 @@
   /// Used in various insertion functions to specify whether we are talking
   /// about the beginning or end of a block.
   enum InsertionPlace { Beginning, End };
+  MemoryPhi *createMemoryPhi(BasicBlock *BB);
 
 protected:
   // Used by Memory SSA annotater, dumpers, and wrapper pass
@@ -854,7 +855,6 @@
   determineInsertionPoint(const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks);
   void markUnreachableAsLiveOnEntry(BasicBlock *BB);
   bool dominatesUse(const MemoryAccess *, const MemoryAccess *) const;
-  MemoryPhi *createMemoryPhi(BasicBlock *BB);
   template <typename AliasAnalysisType>
   MemoryUseOrDef *createNewAccess(Instruction *, AliasAnalysisType *,
                                   const MemoryUseOrDef *Template = nullptr);
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -386,9 +386,13 @@
   // This ends the loop pass pipelines.
 
   if (OptLevel > 1) {
+    if (EnableGVNHoist)
+      MPM.add(createGVNHoistPass());      
     MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
     MPM.add(NewGVN ? createNewGVNPass()
                    : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
+    if (EnableGVNHoist)
+      MPM.add(createGVNHoistPass());    
   }
   MPM.add(createMemCpyOptPass());             // Remove memcpy / form memset
   MPM.add(createSCCPPass());                  // Constant prop with SCCP
diff --git a/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/llvm/lib/Transforms/Scalar/GVNHoist.cpp
--- a/llvm/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/llvm/lib/Transforms/Scalar/GVNHoist.cpp
@@ -31,6 +31,49 @@
 // is disabled in the following cases.
 // 1. Scalars across calls.
 // 2. geps when corresponding load/store cannot be hoisted.
+//
+// TODO:
+// For -Oz scalars are always safe to hoist => NO.
+// For -O2/-O3 hoist only when the live range improves or remains the same.
+// If we haven't computed dominator tree levels, do so now.
+// For sink operation anything after hoist barrier is okay to sink
+// but nothing before the hoist barrier. See Danny's patch on HoistBarriers.
+//
+// Find sink opportunities: sink instructions to a common post dominator when
+// they compute the same value and hoisting isn't profitable because that will
+// increase the liveness. In this case sinking may decrease the liveness when
+// the instruction can be delayed. But do not delay so much that latency of the
+// operation takes over. Like divide takes 10 cycles maybe, and if the divide is
+// sunk into a BB where it is used then the result of divide may take long time
+// anyways. So an estimated latency of operation should be computed and the
+// instruction should be sunk only till the point when result may be readily
+// available for the user of the instruction.
+//
+// Sinking store may not always be beneficial: sinking frees up a register so
+// improves register allocation the stored result may be loaded again, in that
+// case it is good to store soon (??)
+//
+// TODO: Hoist from >2 successors. Currently GVNHoist will not hoist stores
+// in this case because it works on two instructions at a time.
+// entry:
+//   switch i32 %c1, label %exit1 [
+//     i32 0, label %sw0
+//     i32 1, label %sw1
+//   ]
+//
+// sw0:
+//   store i32 1, i32* @G
+//   br label %exit
+//
+// sw1:
+//   store i32 1, i32* @G
+//   br label %exit
+//
+// exit1:
+//   store i32 1, i32* @G
+//   ret void
+// exit:
+//   ret void
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/DenseMap.h"
@@ -47,7 +90,6 @@
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Analysis/PostDominators.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/BasicBlock.h"
@@ -72,6 +114,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 #include <cassert>
 #include <iterator>
@@ -84,6 +127,7 @@
 #define DEBUG_TYPE "gvn-hoist"
 
 STATISTIC(NumHoisted, "Number of instructions hoisted");
+STATISTIC(NumSunk, "Number of instructions sunk");
 STATISTIC(NumRemoved, "Number of instructions removed");
 STATISTIC(NumLoadsHoisted, "Number of loads hoisted");
 STATISTIC(NumLoadsRemoved, "Number of loads removed");
@@ -91,6 +135,7 @@
 STATISTIC(NumStoresRemoved, "Number of stores removed");
 STATISTIC(NumCallsHoisted, "Number of calls hoisted");
 STATISTIC(NumCallsRemoved, "Number of calls removed");
+// STATISTIC(NumBarriers, "Number of barriers");
 
 static cl::opt<int>
     MaxHoistedThreshold("gvn-max-hoisted", cl::Hidden, cl::init(-1),
@@ -108,15 +153,28 @@
              "maximum specified depth (default = 100, unlimited = -1)"));
 
 static cl::opt<int>
-    MaxChainLength("gvn-hoist-max-chain-length", cl::Hidden, cl::init(10),
-                   cl::desc("Maximum length of dependent chains to hoist "
-                            "(default = 10, unlimited = -1)"));
+    MaxHoistChainLength("gvn-hoist-max-chain-length", cl::Hidden, cl::init(10),
+                        cl::desc("Maximum length of dependent chains to hoist "
+                                 "(default = 10, unlimited = -1)"));
+static cl::opt<int>
+    MaxSinkChainLength("gvn-hoist-max-sink-chain", cl::Hidden, cl::init(3),
+                       cl::desc("Maximum length of dependent chains to sink "
+                                "(default = 3, unlimited = -1)"));
+static cl::opt<bool> CheckHoistProfitability(
+    "gvn-hoist-check-profitability", cl::Hidden, cl::init(true),
+    cl::desc("Check for proitability (reducing register pressure)"));
+
+static cl::opt<bool> CheckSinkProfitability(
+    "gvn-sink-check-profitability", cl::Hidden, cl::init(true),
+    cl::desc("Check for proitability (reducing register pressure)"));
 
 namespace llvm {
 
 using BBSideEffectsSet = DenseMap<const BasicBlock *, bool>;
 using SmallVecInsn = SmallVector<Instruction *, 4>;
 using SmallVecImplInsn = SmallVectorImpl<Instruction *>;
+using SmallVecVal = SmallVector<Value *, 2>;
+using SmallVecImplVal = SmallVectorImpl<Value *>;
 
 // Each element of a hoisting list contains the basic block where to hoist and
 // a list of instructions to be hoisted.
@@ -129,6 +187,14 @@
 
 using VNtoInsns = DenseMap<VNType, SmallVector<Instruction *, 4>>;
 
+typedef SmallSet<BasicBlock *, 2> SmallSetBB;
+typedef DenseMap<BasicBlock *, SmallSetBB> MergeSetT;
+typedef SmallVector<BasicBlock *, 4> SmallVecBB;
+typedef SmallVecBB BBLevelKeyT;
+typedef std::map<unsigned, BBLevelKeyT> BBLevelT;
+typedef DenseMap<BasicBlock *, unsigned> DomLevelsT;
+typedef std::pair<BasicBlock *, BasicBlock *> EdgeT;
+
 // CHI keeps information about values flowing out of a basic block.  It is
 // similar to PHI but in the inverse graph, and used for outgoing values on each
 // edge. For conciseness, it is computed only for instructions with multiple
@@ -157,6 +223,7 @@
 using OutValuesType = DenseMap<BasicBlock *, SmallVector<CHIArg, 2>>;
 using InValuesType =
     DenseMap<BasicBlock *, SmallVector<std::pair<VNType, Instruction *>, 2>>;
+typedef DenseMap<VNType, SmallVector<Instruction *, 2>> RenameStackType;
 
 // An invalid value number Used when inserting a single value number into
 // VNtoInsns.
@@ -174,7 +241,11 @@
     VNtoScalars[{V, InvalidVN}].push_back(I);
   }
 
+  void clear() { VNtoScalars.clear(); }
+
+  VNtoInsns &getVNTable() { return VNtoScalars; }
   const VNtoInsns &getVNTable() const { return VNtoScalars; }
+  void removeVN(unsigned V) { VNtoScalars.erase({V, InvalidVN}); }
 };
 
 // Records all load instructions candidate for code hoisting.
@@ -190,7 +261,10 @@
     }
   }
 
+  void clear() { VNtoLoads.clear(); }
+  VNtoInsns &getVNTable() { return VNtoLoads; }
   const VNtoInsns &getVNTable() const { return VNtoLoads; }
+  void removeVN(unsigned V) { VNtoLoads.erase({V, InvalidVN}); }
 };
 
 // Records all store instructions candidate for code hoisting.
@@ -209,7 +283,11 @@
     VNtoStores[{VN.lookupOrAdd(Ptr), VN.lookupOrAdd(Val)}].push_back(Store);
   }
 
+  void clear() { VNtoStores.clear(); }
+
+  VNtoInsns &getVNTable() { return VNtoStores; }
   const VNtoInsns &getVNTable() const { return VNtoStores; }
+  void removeVN(unsigned V) { VNtoStores.erase({V, InvalidVN}); }
 };
 
 // Records all call instructions candidate for code hoisting.
@@ -235,32 +313,196 @@
       VNtoCallsStores[Entry].push_back(Call);
   }
 
+  void clear() {
+    VNtoCallsScalars.clear();
+    VNtoCallsLoads.clear();
+    VNtoCallsStores.clear();
+  }
+
+  VNtoInsns &getScalarVNTable() { return VNtoCallsScalars; }
+  VNtoInsns &getLoadVNTable() { return VNtoCallsLoads; }
+  VNtoInsns &getStoreVNTable() { return VNtoCallsStores; }
   const VNtoInsns &getScalarVNTable() const { return VNtoCallsScalars; }
   const VNtoInsns &getLoadVNTable() const { return VNtoCallsLoads; }
   const VNtoInsns &getStoreVNTable() const { return VNtoCallsStores; }
+
+  void removeVN(unsigned V) {
+    if (!VNtoCallsScalars.erase({V, InvalidVN}))
+      if (!VNtoCallsLoads.erase({V, InvalidVN}))
+        VNtoCallsStores.erase({V, InvalidVN});
+  }
 };
 
 static void combineKnownMetadata(Instruction *ReplInst, Instruction *I) {
-  static const unsigned KnownIDs[] = {
-      LLVMContext::MD_tbaa,           LLVMContext::MD_alias_scope,
-      LLVMContext::MD_noalias,        LLVMContext::MD_range,
-      LLVMContext::MD_fpmath,         LLVMContext::MD_invariant_load,
-      LLVMContext::MD_invariant_group, LLVMContext::MD_access_group};
+  static const unsigned KnownIDs[] = {LLVMContext::MD_tbaa,
+                                      LLVMContext::MD_alias_scope,
+                                      LLVMContext::MD_noalias,
+                                      LLVMContext::MD_range,
+                                      LLVMContext::MD_fpmath,
+                                      LLVMContext::MD_invariant_load,
+                                      LLVMContext::MD_invariant_group,
+                                      LLVMContext::MD_access_group};
   combineMetadata(ReplInst, I, KnownIDs, true);
 }
 
+void printBBLevels(const BBLevelT &BBLevels) {
+  for (const std::pair<unsigned, BBLevelKeyT> &P : BBLevels) {
+    dbgs() << "\nLevel: " << P.first << "\n";
+    for (const BasicBlock *BB : P.second)
+      dbgs() << *BB << "\n";
+  }
+}
+
+void printMergeSet(const MergeSetT &M) {
+  // For printing in a deterministic order.
+  typedef std::set<const BasicBlock *> SetConstBB;
+  auto cmp = [](const BasicBlock *A, const BasicBlock *B) {
+    return A->getName() < B->getName();
+  };
+  std::map<BasicBlock *, SetConstBB, decltype(cmp)> PrintM(cmp);
+  for (const std::pair<BasicBlock *, SmallSetBB> &P : M) {
+    for (const BasicBlock *BB : P.second)
+      PrintM[P.first].insert(BB);
+  }
+  for (const std::pair<BasicBlock *, SetConstBB> &P : PrintM) {
+    dbgs() << "\nMergeSet of: " << P.first->getName() << ": ";
+    for (const BasicBlock *BB : P.second)
+      dbgs() << BB->getName() << ", ";
+  }
+}
+
+void printJEdges(const DenseSet<EdgeT> &Edges) {
+  auto cmp = [](const EdgeT &A, const EdgeT &B) {
+    return A.first->getName() < B.first->getName();
+  };
+  // For printing in a deterministic order.
+  std::set<EdgeT, decltype(cmp)> PrintE(Edges.begin(), Edges.end(), cmp);
+
+  for (const EdgeT &E : PrintE)
+    dbgs() << "\nFound a JEdge: " << E.first->getName() << " -> "
+           << E.second->getName();
+}
+
+void printSmallSet(SmallSetBB &S) {
+  dbgs() << "\nPrinting SmallSet: ";
+  for (const auto &BB : S)
+    dbgs() << BB->getName() << ",";
+}
+
+static inline void updateLocalStats(const Instruction *Repl, unsigned &NI,
+                                    unsigned &NL, unsigned &NS, unsigned &NC) {
+  if (isa<LoadInst>(Repl))
+    ++NL;
+  else if (isa<StoreInst>(Repl))
+    ++NS;
+  else if (isa<CallInst>(Repl))
+    ++NC;
+  else // Scalar
+    ++NI;
+}
+
+static inline void updateHoistStats(unsigned NI, unsigned NL, unsigned NS,
+                                    unsigned NC, unsigned NR) {
+  NumHoisted += NL + NS + NC + NI;
+  NumRemoved += NR;
+  NumLoadsHoisted += NL;
+  NumStoresHoisted += NS;
+  NumCallsHoisted += NC;
+}
+
 // This pass hoists common computations across branches sharing common
 // dominator. The primary goal is to reduce the code size, and in some
 // cases reduce critical path (by exposing more ILP).
 class GVNHoist {
 public:
+  // Each element of a hoisting list contains the basic block where to hoist and
+  // a list of instructions to be hoisted.
+  typedef std::pair<BasicBlock *, SmallVecInsn> HoistingPointInfo;
+  typedef SmallVector<HoistingPointInfo, 4> HoistingPointList;
+
   GVNHoist(DominatorTree *DT, PostDominatorTree *PDT, AliasAnalysis *AA,
            MemoryDependenceResults *MD, MemorySSA *MSSA)
       : DT(DT), PDT(PDT), AA(AA), MD(MD), MSSA(MSSA),
-        MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {}
+        MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)),
+        HoistingGeps(false) {
+    clearVNTables();
+  }
+
+  void clearVNTables() {
+    II.clear();
+    LI.clear();
+    SI.clear();
+    CI.clear();
+  }
+
+  // DomLevels maps from BB -> its depth from root.
+  // JEdges only contain the J edges as D edges are available in Dominator Tree.
+  // BBLevels maps each depth in the CFG to all the Basic Blocks at that level.
+  // DJ Graph is described in "Sreedhar, Vugranam C. Efficient program analysis
+  // using DJ graphs. McGill University, 1996".
+  void constructDJGraph(DomLevelsT &DomLevels, DenseSet<EdgeT> &JEdges,
+                        BBLevelT &BBLevels);
+
+  // Return true if S1 is a subset of S2.
+  bool isSubset(const SmallSetBB &S1, const SmallSetBB &S2) {
+    if (S1.size() > S2.size())
+      return false;
+    for (BasicBlock *BB : S1) {
+      if (!S2.count(BB))
+        return false;
+    }
+    return true;
+  }
+
+  // Returns true when A executes before B.
+  bool DFSInOrder(const Instruction *A, const Instruction *B) const {
+    const BasicBlock *BA = A->getParent();
+    const BasicBlock *BB = B->getParent();
+    unsigned ADFS, BDFS;
+    if (BA == BB) {
+      ADFS = DFSNumber.lookup(A);
+      BDFS = DFSNumber.lookup(B);
+    } else {
+      ADFS = DFSNumber.lookup(BA);
+      BDFS = DFSNumber.lookup(BB);
+    }
+    assert(ADFS && BDFS);
+    return ADFS < BDFS;
+  }
+
+  // DomLevels maps BB to its depth from root.
+  // JEdges only contain the J-edges as D-edges are available in Dominator Tree.
+  // BBLevels maps each depth in the CFG to all the BBs at that level.
+  // BILARDI, G. AND PINGALI, K. 2003. Algorithms for computing the static
+  // single assignment form. J. ACM 50, 3 (May), 375–425.
+  bool constructMergeSet(DomLevelsT &DomLevels, DenseSet<EdgeT> &JEdges,
+                         BBLevelT &BBLevels);
+
+  // Returns true if the \p Val is the last use at \p I.
+  // TODO: Find O(1) algorithm for this.
+  const Instruction *lastUser(const Instruction *I, const Value *Val) const;
+
+  // Returns true if the \p Val is live-out from \p BB.
+  bool isLiveOutUsingMergeSet(BasicBlock *BB, Value *Val) const;
 
   bool run(Function &F) {
+    if (F.hasMinSize()) {
+      CheckHoistProfitability = false;
+      CheckSinkProfitability = false;
+    }
     NumFuncArgs = F.arg_size();
+    // DT->recalculate(F);
+    DT->updateDFSNumbers();
+    DomLevelsT DomLevels;
+    DenseSet<EdgeT> JEdges;
+    BBLevelT BBLevels;
+    constructDJGraph(DomLevels, JEdges, BBLevels);
+    // printBBLevels(BBLevels);
+    LLVM_DEBUG(printJEdges(JEdges));
+    while (constructMergeSet(DomLevels, JEdges, BBLevels))
+      ;
+    LLVM_DEBUG(printMergeSet(MergeSet));
+
     VN.setDomTree(DT);
     VN.setAliasAnalysis(AA);
     VN.setMemDep(MD);
@@ -278,12 +520,13 @@
 
     // FIXME: use lazy evaluation of VN to avoid the fix-point computation.
     while (true) {
-      if (MaxChainLength != -1 && ++ChainLength >= MaxChainLength)
+      if (MaxHoistChainLength != -1 && ++ChainLength >= MaxHoistChainLength)
         return Res;
 
+      clearVNTables();
       auto HoistStat = hoistExpressions(F);
       if (HoistStat.first + HoistStat.second == 0)
-        return Res;
+        break;
 
       if (HoistStat.second > 0)
         // To address a limitation of the current GVN, we need to rerun the
@@ -294,6 +537,8 @@
       Res = true;
     }
 
+    clearVNTables();
+    sinkExpressions(F);
     return Res;
   }
 
@@ -335,808 +580,1511 @@
   std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
   DenseMap<const Value *, unsigned> DFSNumber;
   BBSideEffectsSet BBSideEffects;
-  DenseSet<const BasicBlock *> HoistBarrier;
   SmallVector<BasicBlock *, 32> IDFBlocks;
   unsigned NumFuncArgs;
   const bool HoistingGeps = false;
+  MergeSetT MergeSet;
+  DenseSet<const BasicBlock *> HoistBarrier;
+  InsnInfo II;
+  LoadInfo LI;
+  StoreInfo SI;
+  CallInfo CI;
 
   enum InsKind { Unknown, Scalar, Load, Store };
 
-  // Return true when there are exception handling in BB.
-  bool hasEH(const BasicBlock *BB) {
-    auto It = BBSideEffects.find(BB);
-    if (It != BBSideEffects.end())
-      return It->second;
+  bool hoistCandidate(const User *U) const {
+    if (!VN.exists(const_cast<User *>(U))) // Only for scalars
+      return false;
+    unsigned V = VN.lookup(const_cast<User *>(U));
+
+    VNType L({V, InvalidVN});
 
-    if (BB->isEHPad() || BB->hasAddressTaken()) {
-      BBSideEffects[BB] = true;
+    // Multiple scalars with same VN have very high chance of being hoisted.
+    if (II.getVNTable().count(L) > 1)
       return true;
-    }
 
-    if (BB->getTerminator()->mayThrow()) {
-      BBSideEffects[BB] = true;
+    // Multiple loads with same VN have very high chance of being hoisted.
+    if (LI.getVNTable().count(L) > 1)
       return true;
-    }
 
-    BBSideEffects[BB] = false;
-    return false;
-  }
+    // Multiple stores with same VN have very high chance of being hoisted.
+    if (SI.getVNTable().count(L) > 1)
+      return true;
 
-  // Return true when a successor of BB dominates A.
-  bool successorDominate(const BasicBlock *BB, const BasicBlock *A) {
-    for (const BasicBlock *Succ : successors(BB))
-      if (DT->dominates(Succ, A))
-        return true;
+    // Multiple calls with same VN have very high chance of being hoisted.
+    if (CI.getLoadVNTable().count(L) > 1)
+      return true;
+
+    // Multiple calls with same VN have very high chance of being hoisted.
+    if (CI.getScalarVNTable().count(L) > 1)
+      return true;
 
+    // Multiple calls with same VN have very high chance of being hoisted.
+    if (CI.getStoreVNTable().count(L) > 1)
+      return true;
     return false;
   }
 
-  // Return true when I1 appears before I2 in the instructions of BB.
-  bool firstInBB(const Instruction *I1, const Instruction *I2) {
-    assert(I1->getParent() == I2->getParent());
-    unsigned I1DFS = DFSNumber.lookup(I1);
-    unsigned I2DFS = DFSNumber.lookup(I2);
-    assert(I1DFS && I2DFS);
-    return I1DFS < I2DFS;
-  }
+  bool successorDominate(const BasicBlock *BB, const BasicBlock *A);
+
+  // Return true when there are exception handling in BB.
+  bool hasEH(const BasicBlock *BB);
+
+  bool hasEHhelper(const BasicBlock *BB, const BasicBlock *SrcBB,
+                   int &NBBsOnAllPaths);
+
+  // Return true when there are exception handling or loads of memory Def
+  // between Def and NewPt.  This function is only called for stores: Def is
+  // the MemoryDef of the store to be hoisted.
+  //
+  // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and
+  // return true when the counter NBBsOnAllPaths reaces 0, except when it is
+  // initialized to -1 which is unlimited.
+  bool hasEHOrLoadsOnPath(const Instruction *NewPt, MemoryDef *Def,
+                          int &NBBsOnAllPaths);
+
+  // Return true when there are exception handling between HoistPt and BB.
+  // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and
+  // return true when the counter NBBsOnAllPaths reaches 0, except when it is
+  // initialized to -1 which is unlimited.
+  bool hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *SrcBB,
+                   int &NBBsOnAllPaths);
 
   // Return true when there are memory uses of Def in BB.
   bool hasMemoryUse(const Instruction *NewPt, MemoryDef *Def,
-                    const BasicBlock *BB) {
-    const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB);
-    if (!Acc)
-      return false;
+                    const BasicBlock *BB);
 
-    Instruction *OldPt = Def->getMemoryInst();
-    const BasicBlock *OldBB = OldPt->getParent();
-    const BasicBlock *NewBB = NewPt->getParent();
-    bool ReachedNewPt = false;
+  bool firstInBB(const Instruction *I1, const Instruction *I2) const;
 
-    for (const MemoryAccess &MA : *Acc)
-      if (const MemoryUse *MU = dyn_cast<MemoryUse>(&MA)) {
-        Instruction *Insn = MU->getMemoryInst();
+  // Return true when it is safe to hoist scalar instructions from all blocks in
+  // WL to HoistBB. TODO: Hoisting scalars past a call only when the
+  // interference does not increase.
+  bool safeToHoistScalar(const BasicBlock *HoistBB, const BasicBlock *BB,
+                         int &NBBsOnAllPaths);
+  // Return true when it is safe to hoist a memory load or store U from OldPt
+  // to NewPt.
+  bool safeToHoistLdSt(const Instruction *NewPt, const Instruction *OldPt,
+                       MemoryUseOrDef *MA, InsKind K, int &NBBsOnAllPaths);
+  bool safeToHoistLdSt_0(const Instruction *NewHoistPt,
+                         const Instruction *HoistPt, const Instruction *Insn,
+                         MemoryUseOrDef *MA, GVNHoist::InsKind K,
+                         int &NumBBsOnAllPaths, const BasicBlock *HoistPtBB,
+                         const BasicBlock *NewHoistBB, const BasicBlock *InsnBB,
+                         SmallPtrSetImpl<const BasicBlock *> &WL);
+  // Return true when it is profitable to hoist I1.
+  bool profitableToHoist(Instruction *I) const;
+  bool profitableToHoist(CHIArgs C) const;
+  // Return true when it is profitable to hoist I1.
+  bool profitableToSink(Instruction *I) const;
+
+  bool safeToSinkToEnd(Instruction *I, BasicBlock *BB, GVNHoist::InsKind K);
+
+  // Find sinkable instructions.
+  void findSinkableInsn(VNtoInsns &Map, HoistingPointList &HPL, InsKind K);
+
+  bool valueAnticipable(CHIArgs C, Instruction *TI) const;
+  void checkSafety(CHIArgs C, BasicBlock *BB, InsKind K,
+                   SmallVectorImpl<CHIArg> &Safe);
 
-        // Do not check whether MU aliases Def when MU occurs after OldPt.
-        if (BB == OldBB && firstInBB(OldPt, Insn))
-          break;
+  // Push all the VNs corresponding to BB into RenameStack.
+  void fillRenameStack(BasicBlock *BB, InValuesType &ValueBBs,
+                       RenameStackType &RenameStack);
 
-        // Do not check whether MU aliases Def when MU occurs before NewPt.
-        if (BB == NewBB) {
-          if (!ReachedNewPt) {
-            if (firstInBB(Insn, NewPt))
-              continue;
-            ReachedNewPt = true;
-          }
-        }
-        if (MemorySSAUtil::defClobbersUseOrDef(Def, MU, *AA))
-          return true;
-      }
+  void fillChiArgs(BasicBlock *BB, OutValuesType &CHIBBs,
+                   RenameStackType &RenameStack);
 
-    return false;
+  void insertCHI(InValuesType &ValueBBs, OutValuesType &CHIBBs);
+
+  void findHoistableCandidates(OutValuesType &CHIBBs, InsKind K,
+                               HoistingPointList &HPL);
+
+  void computeInsertionPoints(VNtoInsns &Map, HoistingPointList &HPL,
+                              InsKind K);
+
+  // Return true when all operands of Instr are available at insertion point
+  // HoistPt. When limiting the number of hoisted expressions, one could hoist
+  // a load without hoisting its access function. So before hoisting any
+  // expression, make sure that all its operands are available at insert point.
+  bool allOperandsAvailable(Instruction *I, BasicBlock *NewDest,
+                            SmallVecImplVal &Unav) const;
+  // Same as allOperandsAvailable with recursive check for GEP operands.
+  bool allGepOperandsAvailable(const Instruction *I,
+                               const BasicBlock *HoistPt) const;
+
+  // Make all operands of the GEP available.
+  void makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt,
+                         const SmallVecInsn &InstructionsToHoist,
+                         Instruction *Gep) const;
+
+  // In the case Repl is a load or a store, we make all their GEPs
+  // available: GEPs are not hoisted by default to avoid the address
+  // computations to be hoisted without the associated load or store.
+  bool makeGepOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt,
+                                const SmallVecInsn &InstructionsToHoist) const;
+
+  void updateAlignment(Instruction *I, Instruction *Repl);
+
+  void removeAndReplace(const SmallVecInsn &InstructionsToHoist,
+                        Instruction *Repl);
+  void removeAndReplace(Instruction *I, Instruction *Repl,
+                        MemoryAccess *NewMemAcc);
+
+  // Remove and rename all instructions other than Repl.
+  unsigned removeAndReplace(const SmallVecInsn &Candidates, Instruction *Repl,
+                            BasicBlock *DestBB, bool MoveAccess);
+
+  void removeMPhi(MemoryAccess *NewMemAcc);
+  unsigned rauw(const SmallVecInsn &Candidates, Instruction *Repl,
+                MemoryUseOrDef *NewMemAcc);
+  void raMPHIuw(MemoryUseOrDef *NewMemAcc);
+
+  std::pair<unsigned, unsigned> hoist(HoistingPointList &HPL);
+  std::pair<unsigned, unsigned> sink(HoistingPointList &HPL);
+  bool getVN(Instruction *I);
+  void barriersAndVNs(Function &F, bool Sinking);
+  // Hoist all expressions. Returns Number of scalars hoisted
+  // and number of non-scalars hoisted.
+  std::pair<unsigned, unsigned> hoistExpressions(Function &F);
+  std::pair<unsigned, unsigned> sinkExpressions(Function &F);
+};
+
+// Return true when there are exception handling in BB.
+bool GVNHoist::hasEH(const BasicBlock *BB) {
+  auto It = BBSideEffects.find(BB);
+  if (It != BBSideEffects.end())
+    return It->second;
+
+  if (BB->isEHPad() || BB->hasAddressTaken()) {
+    BBSideEffects[BB] = true;
+    return true;
   }
+  if (BB->getTerminator()->mayThrow()) {
+    BBSideEffects[BB] = true;
+    return true;
+  }
+  BBSideEffects[BB] = false;
+  return false;
+}
 
-  bool hasEHhelper(const BasicBlock *BB, const BasicBlock *SrcBB,
-                   int &NBBsOnAllPaths) {
-    // Stop walk once the limit is reached.
-    if (NBBsOnAllPaths == 0)
+// Return true when a successor of BB dominates A.
+bool GVNHoist::successorDominate(const BasicBlock *BB, const BasicBlock *A) {
+  for (const BasicBlock *Succ : successors(BB))
+    if (DT->dominates(Succ, A))
       return true;
 
-    // Impossible to hoist with exceptions on the path.
-    if (hasEH(BB))
-      return true;
+  return false;
+}
 
-    // No such instruction after HoistBarrier in a basic block was
-    // selected for hoisting so instructions selected within basic block with
-    // a hoist barrier can be hoisted.
-    if ((BB != SrcBB) && HoistBarrier.count(BB))
-      return true;
+// Return true when I1 appears before I2 in the instructions of BB.
+bool GVNHoist::firstInBB(const Instruction *I1, const Instruction *I2) const {
+  assert(I1->getParent() == I2->getParent());
+  unsigned I1DFS = DFSNumber.lookup(I1);
+  unsigned I2DFS = DFSNumber.lookup(I2);
+  assert(I1DFS && I2DFS);
+  return I1DFS < I2DFS;
+}
 
+// Return true when there are memory uses of Def in BB.
+bool GVNHoist::hasMemoryUse(const Instruction *NewPt, MemoryDef *Def,
+                            const BasicBlock *BB) {
+  const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB);
+  if (!Acc)
     return false;
-  }
 
-  // Return true when there are exception handling or loads of memory Def
-  // between Def and NewPt.  This function is only called for stores: Def is
-  // the MemoryDef of the store to be hoisted.
+  Instruction *OldPt = Def->getMemoryInst();
+  const BasicBlock *OldBB = OldPt->getParent();
+  const BasicBlock *NewBB = NewPt->getParent();
+  bool ReachedNewPt = false;
 
-  // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and
-  // return true when the counter NBBsOnAllPaths reaces 0, except when it is
-  // initialized to -1 which is unlimited.
-  bool hasEHOrLoadsOnPath(const Instruction *NewPt, MemoryDef *Def,
-                          int &NBBsOnAllPaths) {
-    const BasicBlock *NewBB = NewPt->getParent();
-    const BasicBlock *OldBB = Def->getBlock();
-    assert(DT->dominates(NewBB, OldBB) && "invalid path");
-    assert(DT->dominates(Def->getDefiningAccess()->getBlock(), NewBB) &&
-           "def does not dominate new hoisting point");
-
-    // Walk all basic blocks reachable in depth-first iteration on the inverse
-    // CFG from OldBB to NewBB. These blocks are all the blocks that may be
-    // executed between the execution of NewBB and OldBB. Hoisting an expression
-    // from OldBB into NewBB has to be safe on all execution paths.
-    for (auto I = idf_begin(OldBB), E = idf_end(OldBB); I != E;) {
-      const BasicBlock *BB = *I;
+  for (const MemoryAccess &MA : *Acc)
+    if (const MemoryUse *MU = dyn_cast<MemoryUse>(&MA)) {
+      Instruction *Insn = MU->getMemoryInst();
+
+      // Do not check whether MU aliases Def when MU occurs after OldPt.
+      if (BB == OldBB && firstInBB(OldPt, Insn))
+        break;
+
+      // Do not check whether MU aliases Def when MU occurs before NewPt.
       if (BB == NewBB) {
-        // Stop traversal when reaching HoistPt.
-        I.skipChildren();
-        continue;
+        if (!ReachedNewPt) {
+          if (firstInBB(Insn, NewPt))
+            continue;
+          ReachedNewPt = true;
+        }
       }
-
-      if (hasEHhelper(BB, OldBB, NBBsOnAllPaths))
+      if (MemorySSAUtil::defClobbersUseOrDef(Def, MU, *AA))
         return true;
+    }
 
-      // Check that we do not move a store past loads.
-      if (hasMemoryUse(NewPt, Def, BB))
-        return true;
+  return false;
+}
 
-      // -1 is unlimited number of blocks on all paths.
-      if (NBBsOnAllPaths != -1)
-        --NBBsOnAllPaths;
+bool GVNHoist::hasEHhelper(const BasicBlock *BB, const BasicBlock *SrcBB,
+                           int &NBBsOnAllPaths) {
+  // Stop walk once the limit is reached.
+  if (NBBsOnAllPaths == 0)
+    return true;
+
+  // Impossible to hoist with exceptions on the path.
+  if (hasEH(BB))
+    return true;
+
+  // No such instruction after HoistBarrier in a basic block was
+  // selected for hoisting so instructions selected within basic block with
+  // a hoist barrier can be hoisted.
+  if ((BB != SrcBB) && HoistBarrier.count(BB))
+    return true;
+
+  return false;
+}
 
-      ++I;
+// Return true when there are exception handling or loads of memory Def
+// between Def and NewPt.  This function is only called for stores: Def is
+// the MemoryDef of the store to be hoisted.
+
+// Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and
+// return true when the counter NBBsOnAllPaths reaces 0, except when it is
+// initialized to -1 which is unlimited.
+bool GVNHoist::hasEHOrLoadsOnPath(const Instruction *NewPt, MemoryDef *Def,
+                                  int &NBBsOnAllPaths) {
+  const BasicBlock *NewBB = NewPt->getParent();
+  const BasicBlock *OldBB = Def->getBlock();
+  assert(DT->dominates(NewBB, OldBB) && "invalid path");
+  assert(DT->dominates(Def->getDefiningAccess()->getBlock(), NewBB) &&
+         "def does not dominate new hoisting point");
+
+  // Walk all basic blocks reachable in depth-first iteration on the inverse
+  // CFG from OldBB to NewBB. These blocks are all the blocks that may be
+  // executed between the execution of NewBB and OldBB. Hoisting an expression
+  // from OldBB into NewBB has to be safe on all execution paths.
+  for (auto I = idf_begin(OldBB), E = idf_end(OldBB); I != E;) {
+    const BasicBlock *BB = *I;
+    if (BB == NewBB) {
+      // Stop traversal when reaching HoistPt.
+      I.skipChildren();
+      continue;
     }
 
-    return false;
-  }
+    if (hasEHhelper(BB, OldBB, NBBsOnAllPaths))
+      return true;
 
-  // Return true when there are exception handling between HoistPt and BB.
-  // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and
-  // return true when the counter NBBsOnAllPaths reaches 0, except when it is
-  // initialized to -1 which is unlimited.
-  bool hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *SrcBB,
-                   int &NBBsOnAllPaths) {
-    assert(DT->dominates(HoistPt, SrcBB) && "Invalid path");
-
-    // Walk all basic blocks reachable in depth-first iteration on
-    // the inverse CFG from BBInsn to NewHoistPt. These blocks are all the
-    // blocks that may be executed between the execution of NewHoistPt and
-    // BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe
-    // on all execution paths.
-    for (auto I = idf_begin(SrcBB), E = idf_end(SrcBB); I != E;) {
-      const BasicBlock *BB = *I;
-      if (BB == HoistPt) {
-        // Stop traversal when reaching NewHoistPt.
-        I.skipChildren();
-        continue;
-      }
+    // Check that we do not move a store past loads.
+    if (hasMemoryUse(NewPt, Def, BB))
+      return true;
 
-      if (hasEHhelper(BB, SrcBB, NBBsOnAllPaths))
-        return true;
+    // -1 is unlimited number of blocks on all paths.
+    if (NBBsOnAllPaths != -1)
+      --NBBsOnAllPaths;
 
-      // -1 is unlimited number of blocks on all paths.
-      if (NBBsOnAllPaths != -1)
-        --NBBsOnAllPaths;
+    ++I;
+  }
+
+  return false;
+}
 
-      ++I;
+// Return true when there are exception handling between HoistPt and BB.
+// Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and
+// return true when the counter NBBsOnAllPaths reaches 0, except when it is
+// initialized to -1 which is unlimited.
+bool GVNHoist::hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *SrcBB,
+                           int &NBBsOnAllPaths) {
+  assert(DT->dominates(HoistPt, SrcBB) && "Invalid path");
+
+  // Walk all basic blocks reachable in depth-first iteration on
+  // the inverse CFG from BBInsn to NewHoistPt. These blocks are all the
+  // blocks that may be executed between the execution of NewHoistPt and
+  // BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe
+  // on all execution paths.
+  for (auto I = idf_begin(SrcBB), E = idf_end(SrcBB); I != E;) {
+    const BasicBlock *BB = *I;
+    if (BB == HoistPt) {
+      // Stop traversal when reaching NewHoistPt.
+      I.skipChildren();
+      continue;
     }
 
-    return false;
+    if (hasEHhelper(BB, SrcBB, NBBsOnAllPaths))
+      return true;
+
+    // -1 is unlimited number of blocks on all paths.
+    if (NBBsOnAllPaths != -1)
+      --NBBsOnAllPaths;
+
+    ++I;
   }
 
-  // Return true when it is safe to hoist a memory load or store U from OldPt
-  // to NewPt.
-  bool safeToHoistLdSt(const Instruction *NewPt, const Instruction *OldPt,
-                       MemoryUseOrDef *U, InsKind K, int &NBBsOnAllPaths) {
-    // In place hoisting is safe.
-    if (NewPt == OldPt)
-      return true;
+  return false;
+}
 
-    const BasicBlock *NewBB = NewPt->getParent();
-    const BasicBlock *OldBB = OldPt->getParent();
-    const BasicBlock *UBB = U->getBlock();
+// Return true when it is safe to hoist a memory load or store U from OldPt
+// to NewPt.
+bool GVNHoist::safeToHoistLdSt(const Instruction *NewPt,
+                               const Instruction *OldPt, MemoryUseOrDef *U,
+                               InsKind K, int &NBBsOnAllPaths) {
 
-    // Check for dependences on the Memory SSA.
-    MemoryAccess *D = U->getDefiningAccess();
-    BasicBlock *DBB = D->getBlock();
-    if (DT->properlyDominates(NewBB, DBB))
-      // Cannot move the load or store to NewBB above its definition in DBB.
-      return false;
+  // In place hoisting is safe.
+  if (NewPt == OldPt)
+    return true;
 
-    if (NewBB == DBB && !MSSA->isLiveOnEntryDef(D))
-      if (auto *UD = dyn_cast<MemoryUseOrDef>(D))
-        if (!firstInBB(UD->getMemoryInst(), NewPt))
-          // Cannot move the load or store to NewPt above its definition in D.
-          return false;
+  const BasicBlock *NewBB = NewPt->getParent();
+  const BasicBlock *OldBB = OldPt->getParent();
+  const BasicBlock *UBB = U->getBlock();
+
+  // Check for dependences on the Memory SSA.
+  MemoryAccess *D = U->getDefiningAccess();
+  BasicBlock *DBB = D->getBlock();
+  if (DT->properlyDominates(NewBB, DBB))
+    // Cannot move the load or store to NewBB above its definition in DBB.
+    return false;
 
-    // Check for unsafe hoistings due to side effects.
-    if (K == InsKind::Store) {
-      if (hasEHOrLoadsOnPath(NewPt, cast<MemoryDef>(U), NBBsOnAllPaths))
+  if (NewBB == DBB && !MSSA->isLiveOnEntryDef(D))
+    if (auto *UD = dyn_cast<MemoryUseOrDef>(D))
+      if (!firstInBB(UD->getMemoryInst(), NewPt))
+        // Cannot move the load or store to NewPt above its definition in D.
         return false;
-    } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths))
+
+  // Check for unsafe hoistings due to side effects.
+  if (K == InsKind::Store) {
+    if (hasEHOrLoadsOnPath(NewPt, dyn_cast<MemoryDef>(U), NBBsOnAllPaths))
       return false;
+  } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths))
+    return false;
+
+  if (UBB == NewBB) {
+    if (DT->properlyDominates(DBB, NewBB))
+      return true;
+    assert(UBB == DBB);
+    assert(MSSA->locallyDominates(D, U));
+  }
+
+  // No side effects: it is safe to hoist.
+  return true;
+}
+
+// Return true when it is safe to hoist scalar instructions from all blocks in
+// WL to HoistBB.
+bool GVNHoist::safeToHoistScalar(const BasicBlock *HoistBB,
+                                 const BasicBlock *BB, int &NBBsOnAllPaths) {
+  return !hasEHOnPath(HoistBB, BB, NBBsOnAllPaths);
+}
+
+bool GVNHoist::safeToHoistLdSt_0(const Instruction *NewHoistPt,
+                                 const Instruction *HoistPt,
+                                 const Instruction *Insn, MemoryUseOrDef *MA,
+                                 GVNHoist::InsKind K, int &NumBBsOnAllPaths,
+                                 const BasicBlock *HoistPtBB,
+                                 const BasicBlock *NewHoistBB,
+                                 const BasicBlock *InsnBB,
+                                 SmallPtrSetImpl<const BasicBlock *> &WL) {
+  return (HoistPtBB == NewHoistBB || InsnBB == NewHoistBB
+          /*hoistingFromAllPaths(NewHoistBB, WL)*/) &&
+         // Also check that it is safe to move the load or store from HoistPt
+         // to NewHoistPt, and from Insn to NewHoistPt. Note that HoistPt may
+         // not be the instruction to be hoisted, it is a transient placeholder
+         // to find the farthest hoisting point when >2 hoistable candidates
+         // can be hoisted to a common dominator.
+         safeToHoistLdSt(NewHoistPt, HoistPt, MA, K, NumBBsOnAllPaths) &&
+         safeToHoistLdSt(NewHoistPt, Insn, MSSA->getMemoryAccess(Insn), K,
+                         NumBBsOnAllPaths);
+}
 
-    if (UBB == NewBB) {
-      if (DT->properlyDominates(DBB, NewBB))
+bool GVNHoist::profitableToHoist(Instruction *I) const {
+  // For -O3/-O2 hoist only when the liveness decreases i.e., no more than
+  // one operand can be a use without kill.
+  // Store and Calls do not create a register def.
+  if (isa<StoreInst>(I) || isa<CallInst>(I))
+    return true;
+
+  // If Op is a kill then it will not be live-out from its basic block
+  // but the reverse is not true.
+  for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
+    Value *Op = I->getOperand(op);
+    // if (isa<Constant>(Op))
+    //  continue;
+    if (isLiveOutUsingMergeSet(I->getParent(), Op))
+      return false;
+    // It is always profitable to hoist when the liveness does not increase,
+    // a Kill will compensate for the def created by this instruction.
+    const Instruction *LU = lastUser(I, Op);
+    if (LU == I)
+      return true;
+    else {
+      // We optimistically assume that if all the users of Op are hoistable
+      // candidates then it is profitable to hoist.
+      bool stillProfitable = true;
+      for (const User *U : Op->users()) {
+        if (!hoistCandidate(U)) {
+          stillProfitable = false;
+          break;
+        }
+        LLVM_DEBUG(dbgs() << "\nstill hoistable:" << *U);
+      }
+      if (stillProfitable)
         return true;
-      assert(UBB == DBB);
-      assert(MSSA->locallyDominates(D, U));
     }
+  }
+  return false;
+}
 
-    // No side effects: it is safe to hoist.
+bool GVNHoist::profitableToHoist(CHIArgs C) const {
+  if (!CheckHoistProfitability)
     return true;
+  int count = 0;
+  for (const auto &A : C) {
+    if (!profitableToHoist(A.I))
+      ++count;
   }
+  return 2 * count <= size(C);
+}
 
-  // Return true when it is safe to hoist scalar instructions from all blocks in
-  // WL to HoistBB.
-  bool safeToHoistScalar(const BasicBlock *HoistBB, const BasicBlock *BB,
-                         int &NBBsOnAllPaths) {
-    return !hasEHOnPath(HoistBB, BB, NBBsOnAllPaths);
+bool GVNHoist::profitableToSink(Instruction *I) const {
+  if (!CheckSinkProfitability)
+    return true;
+  // For -O3/-O2 sink only when the liveness decreases i.e., no more than
+  // one operand can be a kill.
+  // Store and Calls do not create a register def.
+  if (isa<StoreInst>(I) || isa<CallInst>(I))
+    return false;
+
+  // If Op is a kill then it will not be live-out from its basic block
+  // but the reverse is not true.
+  unsigned NumKills = 0;
+  for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
+    Value *Op = I->getOperand(op);
+    // It is always profitable to sink when the liveness does not increase.
+    if (isLiveOutUsingMergeSet(I->getParent(), Op))
+      continue;
+    if (!isa<Constant>(Op))
+      ++NumKills;
+    // A Kill will increase the liveness during a sink.
+    const Instruction *LU = lastUser(I, Op);
+    if (LU == I && NumKills > 1)
+      return false;
   }
+  return true;
+}
 
-  // In the inverse CFG, the dominance frontier of basic block (BB) is the
-  // point where ANTIC needs to be computed for instructions which are going
-  // to be hoisted. Since this point does not change during gvn-hoist,
-  // we compute it only once (on demand).
-  // The ides is inspired from:
-  // "Partial Redundancy Elimination in SSA Form"
-  // ROBERT KENNEDY, SUN CHAN, SHIN-MING LIU, RAYMOND LO, PENG TU and FRED CHOW
-  // They use similar idea in the forward graph to find fully redundant and
-  // partially redundant expressions, here it is used in the inverse graph to
-  // find fully anticipable instructions at merge point (post-dominator in
-  // the inverse CFG).
-  // Returns the edge via which an instruction in BB will get the values from.
-
-  // Returns true when the values are flowing out to each edge.
-  bool valueAnticipable(CHIArgs C, Instruction *TI) const {
-    if (TI->getNumSuccessors() > (unsigned)size(C))
-      return false; // Not enough args in this CHI.
-
-    for (auto CHI : C) {
-      BasicBlock *Dest = CHI.Dest;
-      // Find if all the edges have values flowing out of BB.
-      bool Found = llvm::any_of(
-          successors(TI), [Dest](const BasicBlock *BB) { return BB == Dest; });
-      if (!Found)
+// Returns true when the instruction does not result
+// in a hazard with any other instruction till the end of BB.
+bool GVNHoist::safeToSinkToEnd(Instruction *I, BasicBlock *BB,
+                               GVNHoist::InsKind K) {
+  BasicBlock::iterator II(I);
+  auto IMA = MSSA->getMemoryAccess(I);
+  assert(IMA);
+  if (MemoryUseOrDef *I0UD = cast<MemoryUseOrDef>(IMA)) {
+    // Only sink loads for now because sinking load either decreases or
+    // preserves live-range.
+    if (!isa<MemoryUse>(I0UD))
+      return false;
+
+    // MemoryAccess *Def = I0UD->getDefiningAccess();
+    /*/ Updating Memory PHI is tricky, bail out for now.
+    for (User *U : Def->users())
+      if (isa<MemoryPhi>(U))
+        return false;*/
+  }
+  while (++II != BB->end()) { // Skip I
+    auto IIMA = MSSA->getMemoryAccess(&*II);
+    // TODO: Bails out on any memory writes for now, improve for non-aliases.
+    if (IIMA) {
+      if (!isa<MemoryUse>(IIMA))
         return false;
     }
-    return true;
   }
+  return true;
+}
 
-  // Check if it is safe to hoist values tracked by CHI in the range
-  // [Begin, End) and accumulate them in Safe.
-  void checkSafety(CHIArgs C, BasicBlock *BB, InsKind K,
-                   SmallVectorImpl<CHIArg> &Safe) {
-    int NumBBsOnAllPaths = MaxNumberOfBBSInPath;
-    for (auto CHI : C) {
-      Instruction *Insn = CHI.I;
-      if (!Insn) // No instruction was inserted in this CHI.
-        continue;
-      if (K == InsKind::Scalar) {
-        if (safeToHoistScalar(BB, Insn->getParent(), NumBBsOnAllPaths))
-          Safe.push_back(CHI);
-      } else {
-        MemoryUseOrDef *UD = MSSA->getMemoryAccess(Insn);
-        if (safeToHoistLdSt(BB->getTerminator(), Insn, UD, K, NumBBsOnAllPaths))
-          Safe.push_back(CHI);
-      }
+void GVNHoist::findSinkableInsn(VNtoInsns &Map,
+                                GVNHoist::HoistingPointList &HPL,
+                                GVNHoist::InsKind K) {
+  // Sort the VNs by rank, higher ranked should be sunk first.
+  // SmallVector<unsigned, 8> SortedRank;
+  // sortByRank(SortedRank, Map);
+  SmallVector<VNtoInsns::key_type, 4> ToErase;
+  for (const auto &Entry : Map) {
+    const SmallVecInsn &V = Entry.second;
+    if (V.size() < 2) {
+      ToErase.push_back(Entry.first);
+      continue;
     }
-  }
 
-  using RenameStackType = DenseMap<VNType, SmallVector<Instruction *, 2>>;
+    // Only sink to common post-dom
+    SmallVecInsn InstructionsToSink;
+    for (Instruction *I : V) {
+      auto BB = I->getParent();
+      // All the sinkable instructions are collected after any barrier in
+      // a basic block so no need to check for barrier BBs.
+
+      // The instruction should have only one user i.e., PHI in the Succ
+      // TODO: Handle store, calls etc. without users
+      unsigned NumUsers = std::distance(I->user_begin(), I->user_end());
+      if (NumUsers != 1)
+        continue;
+      // The only user should be a PHI.
+      if (!isa<PHINode>(I->user_back()))
+        continue;
 
-  // Push all the VNs corresponding to BB into RenameStack.
-  void fillRenameStack(BasicBlock *BB, InValuesType &ValueBBs,
-                       RenameStackType &RenameStack) {
-    auto it1 = ValueBBs.find(BB);
-    if (it1 != ValueBBs.end()) {
-      // Iterate in reverse order to keep lower ranked values on the top.
-      for (std::pair<VNType, Instruction *> &VI : reverse(it1->second)) {
-        // Get the value of instruction I
-        LLVM_DEBUG(dbgs() << "\nPushing on stack: " << *VI.second);
-        RenameStack[VI.first].push_back(VI.second);
+      // Sink to single successor.
+      if (auto Succ = BB->getSingleSuccessor()) {
+        if (I->user_back()->getParent() != Succ)
+          continue; // PHI should be in the immediate successor.
+        // The successor should have exactly two predecessors.
+        unsigned NumPreds = std::distance(pred_begin(Succ), pred_end(Succ));
+        if (NumPreds != 2)
+          continue;
+
+        // Sinkable instruction found, check for safety and profitability.
+        // As the instructions are from a basic block without barriers, we can
+        // sink this instruction as long as there is no hazard. Scalars are
+        // safe to sink.
+        if (K != InsKind::Scalar && !safeToSinkToEnd(I, BB, K))
+          continue;
+        if (!profitableToSink(I))
+          continue;
+
+        // We don't need to check for barriers here because the instruction
+        // will be sunk at the beginning of a basic block i.e., before any
+        // barrier, and I is after any barriers in I->getParent().
+        if (!hasEH(Succ))
+          InstructionsToSink.push_back(I);
       }
     }
-  }
 
-  void fillChiArgs(BasicBlock *BB, OutValuesType &CHIBBs,
-                   RenameStackType &RenameStack) {
-    // For each *predecessor* (because Post-DOM) of BB check if it has a CHI
-    for (auto Pred : predecessors(BB)) {
-      auto P = CHIBBs.find(Pred);
-      if (P == CHIBBs.end()) {
-        continue;
-      }
-      LLVM_DEBUG(dbgs() << "\nLooking at CHIs in: " << Pred->getName(););
-      // A CHI is found (BB -> Pred is an edge in the CFG)
-      // Pop the stack until Top(V) = Ve.
-      auto &VCHI = P->second;
-      for (auto It = VCHI.begin(), E = VCHI.end(); It != E;) {
-        CHIArg &C = *It;
-        if (!C.Dest) {
-          auto si = RenameStack.find(C.VN);
-          // The Basic Block where CHI is must dominate the value we want to
-          // track in a CHI. In the PDom walk, there can be values in the
-          // stack which are not control dependent e.g., nested loop.
-          if (si != RenameStack.end() && si->second.size() &&
-              DT->properlyDominates(Pred, si->second.back()->getParent())) {
-            C.Dest = BB;                     // Assign the edge
-            C.I = si->second.pop_back_val(); // Assign the argument
-            LLVM_DEBUG(dbgs()
-                       << "\nCHI Inserted in BB: " << C.Dest->getName() << *C.I
-                       << ", VN: " << C.VN.first << ", " << C.VN.second);
-          }
-          // Move to next CHI of a different value
-          It = std::find_if(It, VCHI.end(),
-                            [It](CHIArg &A) { return A != *It; });
-        } else
-          ++It;
-      }
+    // assert(InstructionsToSink.size() < 3 && "Test case");
+    // Sort V such that adjacent Instructions share common PDom.
+    if (InstructionsToSink.size() != 2)
+      continue;
+    // assert(InstructionsToSink.size() != 2 && "Test case");
+    // There should be a unique PHI using I0 and I1 to be legally sinkable.
+    if (InstructionsToSink[0]->user_back() ==
+        InstructionsToSink[1]->user_back()) {
+      auto Succ0 = InstructionsToSink[0]->getParent()->getSingleSuccessor();
+      auto Succ1 = InstructionsToSink[1]->getParent()->getSingleSuccessor();
+      assert(Succ0 == Succ1);
+      HPL.push_back({Succ0, InstructionsToSink});
     }
+    ToErase.push_back(Entry.first);
   }
 
-  // Walk the post-dominator tree top-down and use a stack for each value to
-  // store the last value you see. When you hit a CHI from a given edge, the
-  // value to use as the argument is at the top of the stack, add the value to
-  // CHI and pop.
-  void insertCHI(InValuesType &ValueBBs, OutValuesType &CHIBBs) {
-    auto Root = PDT->getNode(nullptr);
-    if (!Root)
-      return;
-    // Depth first walk on PDom tree to fill the CHIargs at each PDF.
-    RenameStackType RenameStack;
-    for (auto Node : depth_first(Root)) {
-      BasicBlock *BB = Node->getBlock();
-      if (!BB)
-        continue;
+  // Erase VNs already handled.
+  for (auto V : ToErase)
+    Map.erase(V);
+}
+
+// In the inverse CFG, the dominance frontier of basic block (BB) is the
+// point where ANTIC needs to be computed for instructions which are going
+// to be hoisted. Since this point does not change during gvn-hoist,
+// we compute it only once (on demand).
+// The ides is inspired from:
+// "Partial Redundancy Elimination in SSA Form"
+// ROBERT KENNEDY, SUN CHAN, SHIN-MING LIU, RAYMOND LO, PENG TU and FRED CHOW
+// They use similar idea in the forward graph to to find fully redundant and
+// partially redundant expressions, here it is used in the inverse graph to
+// find fully anticipable instructions at merge point (post-dominator in
+// the inverse CFG).
+// Returns the edge via which an instruction in BB will get the values from.
+
+// Returns true when the values are flowing out to each edge.
+bool GVNHoist::valueAnticipable(CHIArgs C, Instruction *TI) const {
+  if (TI->getNumSuccessors() > (unsigned)size(C))
+    return false; // Not enough args in this CHI.
+
+  for (const CHIArg &CHI : C) {
+    const BasicBlock *Dest = CHI.Dest;
+    // Find if all the edges have values flowing out of BB.
+    // This is clearly quadratic here. May need more efficient algorithm.
+    // Because it happens for limited CHIs it is 'okay' for now.
+    // TODO: To make it more efficient, collect all successors(TI) and all
+    // CHI.Dest in separate containers in sorted manner, and find a mismatch
+    // to get the same result as the following.
+    bool Found = any_of(successors(TI),
+                        [Dest](const BasicBlock *BB) { return BB == Dest; });
+    if (!Found)
+      return false;
+  }
+  return true;
+}
 
-      // Collect all values in BB and push to stack.
-      fillRenameStack(BB, ValueBBs, RenameStack);
+// Check if it is safe to hoist values tracked by CHI in the range
+// [Begin, End) and accumulate them in Safe.
+void GVNHoist::checkSafety(CHIArgs C, BasicBlock *BB, InsKind K,
+                           SmallVectorImpl<CHIArg> &Safe) {
+  int NumBBsOnAllPaths = MaxNumberOfBBSInPath;
+  for (auto CHI : C) {
+    Instruction *Insn = CHI.I;
+    if (!Insn) // No instruction was inserted in this CHI.
+      continue;
+    if (K == InsKind::Scalar) {
+      if (safeToHoistScalar(BB, Insn->getParent(), NumBBsOnAllPaths))
+        Safe.push_back(CHI);
+    } else {
+      MemoryUseOrDef *UD = MSSA->getMemoryAccess(Insn);
+      // Use safeToHoistLdSt_0
+      if (safeToHoistLdSt(BB->getTerminator(), Insn, UD, K, NumBBsOnAllPaths))
+        Safe.push_back(CHI);
+    }
+  }
+}
 
-      // Fill outgoing values in each CHI corresponding to BB.
-      fillChiArgs(BB, CHIBBs, RenameStack);
+// Push all the VNs corresponding to BB into RenameStack.
+void GVNHoist::fillRenameStack(BasicBlock *BB, InValuesType &ValueBBs,
+                               RenameStackType &RenameStack) {
+  auto it1 = ValueBBs.find(BB);
+  if (it1 != ValueBBs.end()) {
+    // Iterate in reverse order to keep lower ranked values on the top.
+    for (std::pair<VNType, Instruction *> &VI : reverse(it1->second)) {
+      // Get the value of instruction I
+      LLVM_DEBUG(dbgs() << "\nPushing on stack: " << *VI.second);
+      RenameStack[VI.first].push_back(VI.second);
     }
   }
+}
 
-  // Walk all the CHI-nodes to find ones which have a empty-entry and remove
-  // them Then collect all the instructions which are safe to hoist and see if
-  // they form a list of anticipable values. OutValues contains CHIs
-  // corresponding to each basic block.
-  void findHoistableCandidates(OutValuesType &CHIBBs, InsKind K,
-                               HoistingPointList &HPL) {
-    auto cmpVN = [](const CHIArg &A, const CHIArg &B) { return A.VN < B.VN; };
-
-    // CHIArgs now have the outgoing values, so check for anticipability and
-    // accumulate hoistable candidates in HPL.
-    for (std::pair<BasicBlock *, SmallVector<CHIArg, 2>> &A : CHIBBs) {
-      BasicBlock *BB = A.first;
-      SmallVectorImpl<CHIArg> &CHIs = A.second;
-      // Vector of PHIs contains PHIs for different instructions.
-      // Sort the args according to their VNs, such that identical
-      // instructions are together.
-      llvm::stable_sort(CHIs, cmpVN);
-      auto TI = BB->getTerminator();
-      auto B = CHIs.begin();
-      // [PreIt, PHIIt) form a range of CHIs which have identical VNs.
-      auto PHIIt = std::find_if(CHIs.begin(), CHIs.end(),
-                                 [B](CHIArg &A) { return A != *B; });
-      auto PrevIt = CHIs.begin();
-      while (PrevIt != PHIIt) {
-        // Collect values which satisfy safety checks.
-        SmallVector<CHIArg, 2> Safe;
-        // We check for safety first because there might be multiple values in
-        // the same path, some of which are not safe to be hoisted, but overall
-        // each edge has at least one value which can be hoisted, making the
-        // value anticipable along that path.
-        checkSafety(make_range(PrevIt, PHIIt), BB, K, Safe);
-
-        // List of safe values should be anticipable at TI.
-        if (valueAnticipable(make_range(Safe.begin(), Safe.end()), TI)) {
-          HPL.push_back({BB, SmallVecInsn()});
-          SmallVecInsn &V = HPL.back().second;
-          for (auto B : Safe)
-            V.push_back(B.I);
+void GVNHoist::fillChiArgs(BasicBlock *BB, OutValuesType &CHIBBs,
+                           RenameStackType &RenameStack) {
+  // For each *predecessor* (because Post-DOM) of BB check if it has a CHI
+  for (auto Pred : predecessors(BB)) {
+    auto P = CHIBBs.find(Pred);
+    if (P == CHIBBs.end()) {
+      continue;
+    }
+    LLVM_DEBUG(dbgs() << "\nLooking at CHIs in: " << Pred->getName(););
+    // A CHI is found (BB -> Pred is an edge in the CFG)
+    // Pop the stack until Top(V) = Ve.
+    auto &VCHI = P->second;
+    for (auto It = VCHI.begin(), E = VCHI.end(); It != E;) {
+      CHIArg &C = *It;
+      if (!C.Dest) {
+        auto si = RenameStack.find(C.VN);
+        // The Basic Block where CHI is must dominate the value we want to
+        // track in a CHI. In the PDom walk, there can be values in the
+        // stack which are not control dependent e.g., nested loop.
+        if (si != RenameStack.end() && si->second.size() &&
+            DT->properlyDominates(Pred, si->second.back()->getParent())) {
+          C.Dest = BB;                     // Assign the edge
+          C.I = si->second.pop_back_val(); // Assign the argument
+          LLVM_DEBUG(dbgs()
+                     << "\nCHI Inserted in BB: " << C.Dest->getName() << *C.I
+                     << ", VN: " << C.VN.first << ", " << C.VN.second);
         }
+        // Move to next CHI of a different value
+        It = std::find_if(It, VCHI.end(), [It](CHIArg &A) { return A != *It; });
+      } else
+        ++It;
+    }
+  }
+}
+
+// Walk the post-dominator tree top-down and use a stack for each value to
+// store the last value you see. When you hit a CHI from a given edge, the
+// value to use as the argument is at the top of the stack, add the value to
+// CHI and pop.
+void GVNHoist::insertCHI(InValuesType &ValueBBs, OutValuesType &CHIBBs) {
+  auto Root = PDT->getNode(nullptr);
+  if (!Root)
+    return;
+  // Depth first walk on PDom tree to fill the CHIargs at each PDF.
+  RenameStackType RenameStack;
+  for (auto Node : depth_first(Root)) {
+    BasicBlock *BB = Node->getBlock();
+    if (!BB)
+      continue;
+
+    // Collect all values in BB and push to stack.
+    fillRenameStack(BB, ValueBBs, RenameStack);
+
+    // Fill outgoing values in each CHI corresponding to BB.
+    fillChiArgs(BB, CHIBBs, RenameStack);
+  }
+}
 
-        // Check other VNs
-        PrevIt = PHIIt;
-        PHIIt = std::find_if(PrevIt, CHIs.end(),
-                             [PrevIt](CHIArg &A) { return A != *PrevIt; });
+// Walk all the CHI-nodes to find ones which have a empty-entry and remove
+// them Then collect all the instructions which are safe to hoist and see if
+// they form a list of anticipable values. OutValues contains CHIs
+// corresponding to each basic block.
+void GVNHoist::findHoistableCandidates(OutValuesType &CHIBBs, InsKind K,
+                                       HoistingPointList &HPL) {
+  auto cmpVN = [](const CHIArg &A, const CHIArg &B) { return A.VN < B.VN; };
+
+  // CHIArgs now have the outgoing values, so check for anticipability and
+  // accumulate hoistable candidates in HPL.
+  for (std::pair<BasicBlock *, SmallVector<CHIArg, 2>> &A : CHIBBs) {
+    BasicBlock *BB = A.first;
+    SmallVectorImpl<CHIArg> &CHIs = A.second;
+    // Vector of CHIs contains CHIs for different instructions.
+    // Sort the args according to their VNs, such that identical
+    // instructions are together.
+    llvm::stable_sort(CHIs, cmpVN);
+    auto TI = BB->getTerminator();
+    auto B = CHIs.begin();
+    // [PreIt, PHIIt) form a range of CHIs which have identical VNs.
+    auto PHIIt = std::find_if(CHIs.begin(), CHIs.end(),
+                              [B](CHIArg &A) { return A != *B; });
+    auto PrevIt = CHIs.begin();
+    while (PrevIt != PHIIt) {
+      // Collect values which satisfy safety checks.
+      SmallVector<CHIArg, 2> Safe;
+      // We check for safety first because there might be multiple values in
+      // the same path, some of which are not safe to be hoisted, but overall
+      // each edge has at least one value which can be hoisted, making the
+      // value anticipable along that path.
+      checkSafety(make_range(PrevIt, PHIIt), BB, K, Safe);
+
+      // List of safe values should be anticipable at TI.
+      auto SR = make_range(Safe.begin(), Safe.end());
+      if (valueAnticipable(SR, TI) && profitableToHoist(SR)) {
+        HPL.push_back({BB, SmallVecInsn()});
+        SmallVecInsn &V = HPL.back().second;
+        for (auto B : Safe)
+          V.push_back(B.I);
       }
+
+      // Check other VNs
+      PrevIt = PHIIt;
+      PHIIt = std::find_if(PrevIt, CHIs.end(),
+                           [PrevIt](CHIArg &A) { return A != *PrevIt; });
     }
   }
+}
 
-  // Compute insertion points for each values which can be fully anticipated at
-  // a dominator. HPL contains all such values.
-  void computeInsertionPoints(const VNtoInsns &Map, HoistingPointList &HPL,
-                              InsKind K) {
-    // Sort VNs based on their rankings
-    std::vector<VNType> Ranks;
-    for (const auto &Entry : Map) {
-      Ranks.push_back(Entry.first);
-    }
+// Compute insertion points for each values which can be fully anticipated at
+// a dominator. HPL contains all such values.
+void GVNHoist::computeInsertionPoints(VNtoInsns &Map, HoistingPointList &HPL,
+                                      InsKind K) {
+  // Sort VNs based on their rankings
+  std::vector<VNType> Ranks;
+  for (const auto &Entry : Map) {
+    Ranks.push_back(Entry.first);
+  }
 
-    // TODO: Remove fully-redundant expressions.
-    // Get instruction from the Map, assume that all the Instructions
-    // with same VNs have same rank (this is an approximation).
-    llvm::sort(Ranks, [this, &Map](const VNType &r1, const VNType &r2) {
-      return (rank(*Map.lookup(r1).begin()) < rank(*Map.lookup(r2).begin()));
-    });
-
-    // - Sort VNs according to their rank, and start with lowest ranked VN
-    // - Take a VN and for each instruction with same VN
-    //   - Find the dominance frontier in the inverse graph (PDF)
-    //   - Insert the chi-node at PDF
-    // - Remove the chi-nodes with missing entries
-    // - Remove values from CHI-nodes which do not truly flow out, e.g.,
-    //   modified along the path.
-    // - Collect the remaining values that are still anticipable
-    SmallVector<BasicBlock *, 2> IDFBlocks;
-    ReverseIDFCalculator IDFs(*PDT);
-    OutValuesType OutValue;
-    InValuesType InValue;
-    for (const auto &R : Ranks) {
-      const SmallVecInsn &V = Map.lookup(R);
-      if (V.size() < 2)
-        continue;
-      const VNType &VN = R;
-      SmallPtrSet<BasicBlock *, 2> VNBlocks;
-      for (auto &I : V) {
-        BasicBlock *BBI = I->getParent();
-        if (!hasEH(BBI))
-          VNBlocks.insert(BBI);
-      }
-      // Compute the Post Dominance Frontiers of each basic block
-      // The dominance frontier of a live block X in the reverse
-      // control graph is the set of blocks upon which X is control
-      // dependent. The following sequence computes the set of blocks
-      // which currently have dead terminators that are control
-      // dependence sources of a block which is in NewLiveBlocks.
-      IDFs.setDefiningBlocks(VNBlocks);
-      IDFBlocks.clear();
-      IDFs.calculate(IDFBlocks);
-
-      // Make a map of BB vs instructions to be hoisted.
+  // TODO: Remove fully-redundant expressions.
+  // Get instruction from the Map, assume that all the Instructions
+  // with same VNs have same rank (this is an approximation).
+  llvm::sort(Ranks, [this, &Map](const VNType &r1, const VNType &r2) {
+    return (rank(*Map.lookup(r1).begin()) < rank(*Map.lookup(r2).begin()));
+  });
+
+  // - Sort VNs according to their rank, and start with lowest ranked VN
+  // - Take a VN and for each instruction with same VN
+  //   - Find the dominance frontier in the inverse graph (PDF)
+  //   - Insert the chi-node at PDF
+  // - Remove the chi-nodes with missing entries
+  // - Remove values from CHI-nodes which do not truly flow out, e.g.,
+  //   modified along the path.
+  // - Collect the remaining values that are still anticipable
+  SmallVector<BasicBlock *, 2> IDFBlocks;
+  ReverseIDFCalculator IDFs(*PDT);
+  OutValuesType OutValue;
+  InValuesType InValue;
+  SmallVector<VNtoInsns::key_type, 4> ToErase;
+  for (const auto &R : Ranks) {
+    const SmallVecInsn &V = Map.lookup(R);
+    ToErase.push_back(R);
+    if (V.size() < 2)
+      continue;
+    const VNType &VN = R;
+    SmallPtrSet<BasicBlock *, 2> VNBlocks;
+    for (auto &I : V) {
+      BasicBlock *BBI = I->getParent();
+      if (!hasEH(BBI))
+        VNBlocks.insert(BBI);
+    }
+    // Compute the Post Dominance Frontiers of each basic block
+    // The dominance frontier of a live block X in the reverse
+    // control graph is the set of blocks upon which X is control
+    // dependent. The following sequence computes the set of blocks
+    // which currently have dead terminators that are control
+    // dependence sources of a block which is in NewLiveBlocks.
+    IDFs.setDefiningBlocks(VNBlocks);
+    IDFBlocks.clear();
+    IDFs.calculate(IDFBlocks);
+
+    // Make a map of BB vs instructions to be hoisted.
+    for (unsigned i = 0; i < V.size(); ++i) {
+      InValue[V[i]->getParent()].push_back(std::make_pair(VN, V[i]));
+    }
+    // Insert empty CHI node for this VN. This is used to factor out
+    // basic blocks where the ANTIC can potentially change.
+    for (auto IDFB : IDFBlocks) {
       for (unsigned i = 0; i < V.size(); ++i) {
-        InValue[V[i]->getParent()].push_back(std::make_pair(VN, V[i]));
-      }
-      // Insert empty CHI node for this VN. This is used to factor out
-      // basic blocks where the ANTIC can potentially change.
-      for (auto IDFB : IDFBlocks) {
-        for (unsigned i = 0; i < V.size(); ++i) {
-          CHIArg C = {VN, nullptr, nullptr};
-           // Ignore spurious PDFs.
-          if (DT->properlyDominates(IDFB, V[i]->getParent())) {
-            OutValue[IDFB].push_back(C);
-            LLVM_DEBUG(dbgs() << "\nInsertion a CHI for BB: " << IDFB->getName()
-                              << ", for Insn: " << *V[i]);
-          }
+        CHIArg C = {VN, nullptr, nullptr};
+        // Ignore spurious PDFs.
+        if (DT->properlyDominates(IDFB, V[i]->getParent())) {
+          OutValue[IDFB].push_back(C);
+          LLVM_DEBUG(dbgs() << "\nInserting a CHI for BB: " << IDFB->getName()
+                            << ", for Insn: " << *V[i]);
         }
       }
     }
-
-    // Insert CHI args at each PDF to iterate on factored graph of
-    // control dependence.
-    insertCHI(InValue, OutValue);
-    // Using the CHI args inserted at each PDF, find fully anticipable values.
-    findHoistableCandidates(OutValue, K, HPL);
   }
 
-  // Return true when all operands of Instr are available at insertion point
-  // HoistPt. When limiting the number of hoisted expressions, one could hoist
-  // a load without hoisting its access function. So before hoisting any
-  // expression, make sure that all its operands are available at insert point.
-  bool allOperandsAvailable(const Instruction *I,
-                            const BasicBlock *HoistPt) const {
-    for (const Use &Op : I->operands())
-      if (const auto *Inst = dyn_cast<Instruction>(&Op))
-        if (!DT->dominates(Inst->getParent(), HoistPt))
-          return false;
+  // Erase VNs already handled.
+  for (auto V : ToErase)
+    Map.erase(V);
 
-    return true;
-  }
+  // Insert CHI args at each PDF to iterate on factored graph of
+  // control dependence.
+  insertCHI(InValue, OutValue);
+  // Using the CHI args inserted at each PDF, find fully anticipable values.
+  findHoistableCandidates(OutValue, K, HPL);
+}
 
-  // Same as allOperandsAvailable with recursive check for GEP operands.
-  bool allGepOperandsAvailable(const Instruction *I,
-                               const BasicBlock *HoistPt) const {
-    for (const Use &Op : I->operands())
-      if (const auto *Inst = dyn_cast<Instruction>(&Op))
-        if (!DT->dominates(Inst->getParent(), HoistPt)) {
-          if (const GetElementPtrInst *GepOp =
-                  dyn_cast<GetElementPtrInst>(Inst)) {
-            if (!allGepOperandsAvailable(GepOp, HoistPt))
-              return false;
-            // Gep is available if all operands of GepOp are available.
-          } else {
-            // Gep is not available if it has operands other than GEPs that are
-            // defined in blocks not dominating HoistPt.
-            return false;
-          }
-        }
-    return true;
-  }
+bool GVNHoist::allOperandsAvailable(Instruction *I, BasicBlock *NewDest,
+                                    SmallVecImplVal &Unav) const {
+  for (Use &Op : I->operands())
+    if (auto *Inst = dyn_cast<Instruction>(&Op))
+      if (!DT->dominates(Inst->getParent(), NewDest))
+        Unav.push_back(Op.get());
 
-  // Make all operands of the GEP available.
-  void makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt,
-                         const SmallVecInsn &InstructionsToHoist,
-                         Instruction *Gep) const {
-    assert(allGepOperandsAvailable(Gep, HoistPt) &&
-           "GEP operands not available");
-
-    Instruction *ClonedGep = Gep->clone();
-    for (unsigned i = 0, e = Gep->getNumOperands(); i != e; ++i)
-      if (Instruction *Op = dyn_cast<Instruction>(Gep->getOperand(i))) {
-        // Check whether the operand is already available.
-        if (DT->dominates(Op->getParent(), HoistPt))
-          continue;
+  return Unav.empty();
+}
 
-        // As a GEP can refer to other GEPs, recursively make all the operands
-        // of this GEP available at HoistPt.
-        if (GetElementPtrInst *GepOp = dyn_cast<GetElementPtrInst>(Op))
-          makeGepsAvailable(ClonedGep, HoistPt, InstructionsToHoist, GepOp);
+// Same as allOperandsAvailable with recursive check for GEP operands.
+bool GVNHoist::allGepOperandsAvailable(const Instruction *I,
+                                       const BasicBlock *HoistPt) const {
+  for (const Use &Op : I->operands())
+    if (const auto *Inst = dyn_cast<Instruction>(&Op))
+      if (!DT->dominates(Inst->getParent(), HoistPt)) {
+        if (const GetElementPtrInst *GepOp =
+                dyn_cast<GetElementPtrInst>(Inst)) {
+          if (!allGepOperandsAvailable(GepOp, HoistPt))
+            return false;
+          // Gep is available if all operands of GepOp are available.
+        } else {
+          // Gep is not available if it has operands other than GEPs that are
+          // defined in blocks not dominating HoistPt.
+          return false;
+        }
       }
+  return true;
+}
+
+// Make all operands of the GEP available.
+void GVNHoist::makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt,
+                                 const SmallVecInsn &InstructionsToHoist,
+                                 Instruction *Gep) const {
+  assert(allGepOperandsAvailable(Gep, HoistPt) && "GEP operands not available");
+
+  Instruction *ClonedGep = Gep->clone();
+  for (unsigned i = 0, e = Gep->getNumOperands(); i != e; ++i)
+    if (Instruction *Op = dyn_cast<Instruction>(Gep->getOperand(i))) {
+      // Check whether the operand is already available.
+      if (DT->dominates(Op->getParent(), HoistPt))
+        continue;
 
-    // Copy Gep and replace its uses in Repl with ClonedGep.
-    ClonedGep->insertBefore(HoistPt->getTerminator());
-
-    // Conservatively discard any optimization hints, they may differ on the
-    // other paths.
-    ClonedGep->dropUnknownNonDebugMetadata();
-
-    // If we have optimization hints which agree with each other along different
-    // paths, preserve them.
-    for (const Instruction *OtherInst : InstructionsToHoist) {
-      const GetElementPtrInst *OtherGep;
-      if (auto *OtherLd = dyn_cast<LoadInst>(OtherInst))
-        OtherGep = cast<GetElementPtrInst>(OtherLd->getPointerOperand());
-      else
-        OtherGep = cast<GetElementPtrInst>(
-            cast<StoreInst>(OtherInst)->getPointerOperand());
-      ClonedGep->andIRFlags(OtherGep);
+      // As a GEP can refer to other GEPs, recursively make all the operands
+      // of this GEP available at HoistPt.
+      if (GetElementPtrInst *GepOp = dyn_cast<GetElementPtrInst>(Op))
+        makeGepsAvailable(ClonedGep, HoistPt, InstructionsToHoist, GepOp);
     }
 
-    // Replace uses of Gep with ClonedGep in Repl.
-    Repl->replaceUsesOfWith(Gep, ClonedGep);
-  }
+  // Copy Gep and replace its uses in Repl with ClonedGep.
+  ClonedGep->insertBefore(HoistPt->getTerminator());
 
-  void updateAlignment(Instruction *I, Instruction *Repl) {
-    if (auto *ReplacementLoad = dyn_cast<LoadInst>(Repl)) {
-      ReplacementLoad->setAlignment(MaybeAlign(std::min(
-          ReplacementLoad->getAlignment(), cast<LoadInst>(I)->getAlignment())));
-      ++NumLoadsRemoved;
-    } else if (auto *ReplacementStore = dyn_cast<StoreInst>(Repl)) {
-      ReplacementStore->setAlignment(
-          MaybeAlign(std::min(ReplacementStore->getAlignment(),
-                              cast<StoreInst>(I)->getAlignment())));
-      ++NumStoresRemoved;
-    } else if (auto *ReplacementAlloca = dyn_cast<AllocaInst>(Repl)) {
-      ReplacementAlloca->setAlignment(
-          MaybeAlign(std::max(ReplacementAlloca->getAlignment(),
-                              cast<AllocaInst>(I)->getAlignment())));
-    } else if (isa<CallInst>(Repl)) {
-      ++NumCallsRemoved;
-    }
+  // Conservatively discard any optimization hints, they may differ on the
+  // other paths.
+  ClonedGep->dropUnknownNonDebugMetadata();
+
+  // If we have optimization hints which agree with each other along different
+  // paths, preserve them.
+  for (const Instruction *OtherInst : InstructionsToHoist) {
+    const GetElementPtrInst *OtherGep;
+    if (auto *OtherLd = dyn_cast<LoadInst>(OtherInst))
+      OtherGep = cast<GetElementPtrInst>(OtherLd->getPointerOperand());
+    else
+      OtherGep = cast<GetElementPtrInst>(
+          cast<StoreInst>(OtherInst)->getPointerOperand());
+    ClonedGep->andIRFlags(OtherGep);
   }
 
-  // Remove all the instructions in Candidates and replace their usage with Repl.
-  // Returns the number of instructions removed.
-  unsigned rauw(const SmallVecInsn &Candidates, Instruction *Repl,
-                MemoryUseOrDef *NewMemAcc) {
-    unsigned NR = 0;
-    for (Instruction *I : Candidates) {
-      if (I != Repl) {
-        ++NR;
-        updateAlignment(I, Repl);
-        if (NewMemAcc) {
-          // Update the uses of the old MSSA access with NewMemAcc.
-          MemoryAccess *OldMA = MSSA->getMemoryAccess(I);
-          OldMA->replaceAllUsesWith(NewMemAcc);
-          MSSAUpdater->removeMemoryAccess(OldMA);
-        }
+  // Replace uses of Gep with ClonedGep in Repl.
+  Repl->replaceUsesOfWith(Gep, ClonedGep);
+}
 
-        Repl->andIRFlags(I);
-        combineKnownMetadata(Repl, I);
-        I->replaceAllUsesWith(Repl);
-        // Also invalidate the Alias Analysis cache.
-        MD->removeInstruction(I);
-        I->eraseFromParent();
+void GVNHoist::updateAlignment(Instruction *I, Instruction *Repl) {
+  if (auto *ReplacementLoad = dyn_cast<LoadInst>(Repl)) {
+    ReplacementLoad->setAlignment(MaybeAlign(std::min(
+        ReplacementLoad->getAlignment(), cast<LoadInst>(I)->getAlignment())));
+    ++NumLoadsRemoved;
+  } else if (auto *ReplacementStore = dyn_cast<StoreInst>(Repl)) {
+    ReplacementStore->setAlignment(MaybeAlign(std::min(
+        ReplacementStore->getAlignment(), cast<StoreInst>(I)->getAlignment())));
+    ++NumStoresRemoved;
+  } else if (auto *ReplacementAlloca = dyn_cast<AllocaInst>(Repl)) {
+    ReplacementAlloca->setAlignment(
+        MaybeAlign(std::max(ReplacementAlloca->getAlignment(),
+                            cast<AllocaInst>(I)->getAlignment())));
+  } else if (isa<CallInst>(Repl)) {
+    ++NumCallsRemoved;
+  }
+}
+
+// Remove all the instructions in Candidates and replace their usage with Repl.
+// Returns the number of instructions removed.
+unsigned GVNHoist::rauw(const SmallVecInsn &Candidates, Instruction *Repl,
+                        MemoryUseOrDef *NewMemAcc) {
+  unsigned NR = 0;
+  for (Instruction *I : Candidates) {
+    if (I != Repl) {
+      ++NR;
+      updateAlignment(I, Repl);
+      if (NewMemAcc) {
+        // Update the uses of the old MSSA access with NewMemAcc.
+        MemoryAccess *OldMA = MSSA->getMemoryAccess(I);
+        OldMA->replaceAllUsesWith(NewMemAcc);
+        MSSAUpdater->removeMemoryAccess(OldMA);
       }
+
+      Repl->andIRFlags(I);
+      combineKnownMetadata(Repl, I);
+      I->replaceAllUsesWith(Repl);
+      // Also invalidate the Alias Analysis cache.
+      MD->removeInstruction(I);
+      I->eraseFromParent();
     }
-    return NR;
   }
+  return NR;
+}
 
-  // Replace all Memory PHI usage with NewMemAcc.
-  void raMPHIuw(MemoryUseOrDef *NewMemAcc) {
-    SmallPtrSet<MemoryPhi *, 4> UsePhis;
-    for (User *U : NewMemAcc->users())
-      if (MemoryPhi *Phi = dyn_cast<MemoryPhi>(U))
-        UsePhis.insert(Phi);
-
-    for (MemoryPhi *Phi : UsePhis) {
-      auto In = Phi->incoming_values();
-      if (llvm::all_of(In, [&](Use &U) { return U == NewMemAcc; })) {
-        Phi->replaceAllUsesWith(NewMemAcc);
-        MSSAUpdater->removeMemoryAccess(Phi);
-      }
+// Replace all Memory PHI usage with NewMemAcc.
+void GVNHoist::raMPHIuw(MemoryUseOrDef *NewMemAcc) {
+  SmallPtrSet<MemoryPhi *, 4> UsePhis;
+  for (User *U : NewMemAcc->users())
+    if (MemoryPhi *Phi = dyn_cast<MemoryPhi>(U))
+      UsePhis.insert(Phi);
+
+  for (MemoryPhi *Phi : UsePhis) {
+    auto In = Phi->incoming_values();
+    if (llvm::all_of(In, [&](Use &U) { return U == NewMemAcc; })) {
+      Phi->replaceAllUsesWith(NewMemAcc);
+      MSSAUpdater->removeMemoryAccess(Phi);
     }
   }
+}
 
-  // Remove all other instructions and replace them with Repl.
-  unsigned removeAndReplace(const SmallVecInsn &Candidates, Instruction *Repl,
-                            BasicBlock *DestBB, bool MoveAccess) {
-    MemoryUseOrDef *NewMemAcc = MSSA->getMemoryAccess(Repl);
-    if (MoveAccess && NewMemAcc) {
-        // The definition of this ld/st will not change: ld/st hoisting is
-        // legal when the ld/st is not moved past its current definition.
-        MSSAUpdater->moveToPlace(NewMemAcc, DestBB, MemorySSA::End);
+void GVNHoist::removeAndReplace(Instruction *I, Instruction *Repl,
+                                MemoryAccess *NewMemAcc) {
+  updateAlignment(I, Repl);
+  if (NewMemAcc) {
+    // Update the uses of the old MSSA access with NewMemAcc.
+    MemoryAccess *OldMA = MSSA->getMemoryAccess(I);
+    OldMA->replaceAllUsesWith(NewMemAcc);
+    MSSAUpdater->removeMemoryAccess(OldMA);
+  }
+
+  Repl->andIRFlags(I);
+  combineKnownMetadata(Repl, I);
+  I->replaceAllUsesWith(Repl);
+  // Also invalidate the Alias Analysis cache.
+  MD->removeInstruction(I);
+  I->eraseFromParent();
+}
+
+void GVNHoist::removeAndReplace(const SmallVecInsn &InstructionsToHoist,
+                                Instruction *Repl) {
+  MemoryAccess *NewMemAcc = MSSA->getMemoryAccess(Repl);
+  // Remove and rename all other instructions.
+  for (Instruction *I : InstructionsToHoist)
+    if (I != Repl) {
+      removeAndReplace(I, Repl, NewMemAcc);
     }
+}
 
-    // Replace all other instructions with Repl with memory access NewMemAcc.
-    unsigned NR = rauw(Candidates, Repl, NewMemAcc);
+void GVNHoist::removeMPhi(MemoryAccess *NewMemAcc) {
+  // Remove MemorySSA phi nodes with the same arguments.
+  SmallPtrSet<MemoryPhi *, 4> UsePhis;
+  for (User *U : NewMemAcc->users())
+    if (MemoryPhi *Phi = dyn_cast<MemoryPhi>(U))
+      UsePhis.insert(Phi);
+
+  for (auto *Phi : UsePhis) {
+    auto In = Phi->incoming_values();
+    if (all_of(In, [&](Use &U) { return U == NewMemAcc; })) {
+      Phi->replaceAllUsesWith(NewMemAcc);
+      MSSAUpdater->removeMemoryAccess(Phi);
+    }
+  }
+}
 
-    // Remove MemorySSA phi nodes with the same arguments.
-    if (NewMemAcc)
-      raMPHIuw(NewMemAcc);
-    return NR;
+// Remove all other instructions and replace them with Repl.
+unsigned GVNHoist::removeAndReplace(const SmallVecInsn &Candidates,
+                                    Instruction *Repl, BasicBlock *DestBB,
+                                    bool MoveAccess) {
+  MemoryUseOrDef *NewMemAcc = MSSA->getMemoryAccess(Repl);
+  if (MoveAccess && NewMemAcc) {
+    // The definition of this ld/st will not change: ld/st hoisting is
+    // legal when the ld/st is not moved past its current definition.
+    MSSAUpdater->moveToPlace(NewMemAcc, DestBB, MemorySSA::End);
   }
 
-  // In the case Repl is a load or a store, we make all their GEPs
-  // available: GEPs are not hoisted by default to avoid the address
-  // computations to be hoisted without the associated load or store.
-  bool makeGepOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt,
-                                const SmallVecInsn &InstructionsToHoist) const {
-    // Check whether the GEP of a ld/st can be synthesized at HoistPt.
-    GetElementPtrInst *Gep = nullptr;
-    Instruction *Val = nullptr;
-    if (auto *Ld = dyn_cast<LoadInst>(Repl)) {
-      Gep = dyn_cast<GetElementPtrInst>(Ld->getPointerOperand());
-    } else if (auto *St = dyn_cast<StoreInst>(Repl)) {
-      Gep = dyn_cast<GetElementPtrInst>(St->getPointerOperand());
-      Val = dyn_cast<Instruction>(St->getValueOperand());
-      // Check that the stored value is available.
-      if (Val) {
-        if (isa<GetElementPtrInst>(Val)) {
-          // Check whether we can compute the GEP at HoistPt.
-          if (!allGepOperandsAvailable(Val, HoistPt))
-            return false;
-        } else if (!DT->dominates(Val->getParent(), HoistPt))
+  // Replace all other instructions with Repl with memory access NewMemAcc.
+  unsigned NR = rauw(Candidates, Repl, NewMemAcc);
+
+  // Remove MemorySSA phi nodes with the same arguments.
+  if (NewMemAcc)
+    raMPHIuw(NewMemAcc);
+  return NR;
+}
+
+// In the case Repl is a load or a store, we make all their GEPs
+// available: GEPs are not hoisted by default to avoid the address
+// computations to be hoisted without the associated load or store.
+bool GVNHoist::makeGepOperandsAvailable(
+    Instruction *Repl, BasicBlock *HoistPt,
+    const SmallVecInsn &InstructionsToHoist) const {
+  // Check whether the GEP of a ld/st can be synthesized at HoistPt.
+  GetElementPtrInst *Gep = nullptr;
+  Instruction *Val = nullptr;
+  if (auto *Ld = dyn_cast<LoadInst>(Repl)) {
+    Gep = dyn_cast<GetElementPtrInst>(Ld->getPointerOperand());
+  } else if (auto *St = dyn_cast<StoreInst>(Repl)) {
+    Gep = dyn_cast<GetElementPtrInst>(St->getPointerOperand());
+    Val = dyn_cast<Instruction>(St->getValueOperand());
+    // Check that the stored value is available.
+    if (Val) {
+      if (isa<GetElementPtrInst>(Val)) {
+        // Check whether we can compute the GEP at HoistPt.
+        if (!allGepOperandsAvailable(Val, HoistPt))
           return false;
-      }
+      } else if (!DT->dominates(Val->getParent(), HoistPt))
+        return false;
     }
+  }
 
-    // Check whether we can compute the Gep at HoistPt.
-    if (!Gep || !allGepOperandsAvailable(Gep, HoistPt))
-      return false;
+  // Check whether we can compute the Gep at HoistPt.
+  if (!Gep || !allGepOperandsAvailable(Gep, HoistPt))
+    return false;
 
-    makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Gep);
+  makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Gep);
 
-    if (Val && isa<GetElementPtrInst>(Val))
-      makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Val);
+  if (Val && isa<GetElementPtrInst>(Val))
+    makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Val);
 
-    return true;
+  return true;
+}
+
+std::pair<unsigned, unsigned> GVNHoist::hoist(HoistingPointList &HPL) {
+  unsigned NI = 0, NL = 0, NS = 0, NC = 0, NR = 0;
+  for (const HoistingPointInfo &HP : HPL) {
+    // Find out whether we already have one of the instructions in HoistPt,
+    // in which case we do not have to move it.
+    BasicBlock *DestBB = HP.first;
+    const SmallVecInsn &InstructionsToHoist = HP.second;
+    Instruction *Repl = nullptr;
+    for (Instruction *I : InstructionsToHoist)
+      if (I->getParent() == DestBB)
+        // If there are two instructions in HoistPt to be hoisted in place:
+        // update Repl to be the first one, such that we can rename the uses
+        // of the second based on the first.
+        if (!Repl || firstInBB(I, Repl))
+          Repl = I;
+
+    // Keep track of whether we moved the instruction so we know whether we
+    // should move the MemoryAccess.
+    bool MoveAccess = true;
+    if (Repl) {
+      // Repl is already in HoistPt: it remains in place.
+      SmallVecVal V;
+      assert(allOperandsAvailable(Repl, DestBB, V) &&
+             "instruction depends on operands that are not available");
+      MoveAccess = false;
+    } else {
+      // When we do not find Repl in HoistPt, select the first in the list
+      // and move it to HoistPt.
+      Repl = InstructionsToHoist.front();
+
+      // We can move Repl in HoistPt only when all operands are available.
+      // The order in which hoistings are done may influence the availability
+      // of operands.
+      SmallVecVal V;
+      if (!allOperandsAvailable(Repl, DestBB, V)) {
+        // When HoistingGeps there is nothing more we can do to make the
+        // operands available: just continue.
+        if (HoistingGeps)
+          continue;
+
+        // When not HoistingGeps we need to copy the GEPs.
+        if (!makeGepOperandsAvailable(Repl, DestBB, InstructionsToHoist))
+          continue;
+      }
+
+      // Move the instruction at the end of HoistPt.
+      Instruction *Last = DestBB->getTerminator();
+      MD->removeInstruction(Repl);
+      Repl->moveBefore(Last);
+
+      DFSNumber[Repl] = DFSNumber[Last]++;
+    }
+
+    NR += removeAndReplace(InstructionsToHoist, Repl, DestBB, MoveAccess);
+    updateLocalStats(Repl, NI, NL, NS, NC);
   }
 
-  std::pair<unsigned, unsigned> hoist(HoistingPointList &HPL) {
-    unsigned NI = 0, NL = 0, NS = 0, NC = 0, NR = 0;
-    for (const HoistingPointInfo &HP : HPL) {
-      // Find out whether we already have one of the instructions in HoistPt,
-      // in which case we do not have to move it.
-      BasicBlock *DestBB = HP.first;
-      const SmallVecInsn &InstructionsToHoist = HP.second;
-      Instruction *Repl = nullptr;
-      for (Instruction *I : InstructionsToHoist)
-        if (I->getParent() == DestBB)
-          // If there are two instructions in HoistPt to be hoisted in place:
-          // update Repl to be the first one, such that we can rename the uses
-          // of the second based on the first.
-          if (!Repl || firstInBB(I, Repl))
-            Repl = I;
-
-      // Keep track of whether we moved the instruction so we know whether we
-      // should move the MemoryAccess.
-      bool MoveAccess = true;
-      if (Repl) {
-        // Repl is already in HoistPt: it remains in place.
-        assert(allOperandsAvailable(Repl, DestBB) &&
-               "instruction depends on operands that are not available");
-        MoveAccess = false;
-      } else {
-        // When we do not find Repl in HoistPt, select the first in the list
-        // and move it to HoistPt.
-        Repl = InstructionsToHoist.front();
-
-        // We can move Repl in HoistPt only when all operands are available.
-        // The order in which hoistings are done may influence the availability
-        // of operands.
-        if (!allOperandsAvailable(Repl, DestBB)) {
-          // When HoistingGeps there is nothing more we can do to make the
-          // operands available: just continue.
-          if (HoistingGeps)
-            continue;
+  updateHoistStats(NI, NL, NS, NC, NR);
+  return {NI, NL + NC + NS};
+}
 
-          // When not HoistingGeps we need to copy the GEPs.
-          if (!makeGepOperandsAvailable(Repl, DestBB, InstructionsToHoist))
-            continue;
+// Update MemorySSA if mem-refs are sunk.
+std::pair<unsigned, unsigned> GVNHoist::sink(GVNHoist::HoistingPointList &HPL) {
+  unsigned NI = 0, NL = 0, NS = 0, NC = 0;
+
+  std::sort(HPL.begin(), HPL.end(),
+            [this](const HoistingPointInfo &HP1, const HoistingPointInfo &HP2) {
+              // Sort in descending order to have last executing insn first in
+              // HPL.
+              return !DFSInOrder(HP1.second[0], HP2.second[0]);
+            });
+
+  for (const HoistingPointInfo &HP : HPL) {
+    BasicBlock *SinkPt = HP.first;
+    const SmallVecInsn &InstructionsToHoist = HP.second;
+    assert(InstructionsToHoist.size() == 2);
+    Instruction *I0 = InstructionsToHoist[0];
+    Instruction *I1 = InstructionsToHoist[1];
+
+    // We need to check availability for I0 and I1 at SinkPt.
+    // TODO: Additionally, we can still sink if the operand not available is
+    // also getting sunk with available operands.
+    SmallVecVal I0UnavOps, I1UnavOps;
+    allOperandsAvailable(I0, SinkPt, I0UnavOps);
+    allOperandsAvailable(I1, SinkPt, I1UnavOps);
+
+    // Sink only when all operands are available, or only one is unavailable.
+    if (I0UnavOps.size() != I1UnavOps.size() || I1UnavOps.size() > 1)
+      continue;
+
+    // Keep I0 and remove I1 and PN
+    PHINode *PN = cast<PHINode>(I0->user_back());
+    MemoryAccess *I0MemAccess = MSSA->getMemoryAccess(I0);
+    LLVM_DEBUG(dbgs() << "\nSinking:" << *I0 << *I1 << *PN);
+
+    updateAlignment(I1, I0);
+    I0->andIRFlags(I1);
+    combineKnownMetadata(I0, I1);
+    // I1->replaceAllUsesWith(I0); Not required because I1 used once in PHI.
+    // Also invalidate the Alias Analysis cache.
+    MD->removeInstruction(I1);
+    MD->removeInstruction(PN);
+    PN->replaceAllUsesWith(I0);
+    // FIXME: Update Memory SSA
+    // Update the uses of the old MSSA access with I0.
+    // Remove MSSA for I1
+    // Remove MSSA for PN, and replace with I0
+    // Combine metadata of I0 and I1
+    // Memory PHI?
+    // assert(0);
+    bool found = false;
+    auto It = SinkPt->begin();
+    while (It != SinkPt->end()) { // Find the last PHI to insert I0
+      if (!isa<PHINode>(It))
+        break;
+      if (PN == &*It)
+        found = true;
+      ++It;
+    }
+    assert(found && "PHI replaced in wrong BB");
+    BasicBlock *I0BB = I0->getParent();
+    I0->moveBefore(*SinkPt, It);
+
+    if (I0UnavOps.empty())
+      PN->eraseFromParent();
+    else {
+      Value *V = I0UnavOps[0];
+      PHINode *NewPN = PHINode::Create(V->getType(), 2, "", PN);
+      PN->eraseFromParent();
+      // Move the incoming operands in this PHI.
+      NewPN->addIncoming(I0UnavOps[0], I0BB);
+      NewPN->addIncoming(I1UnavOps[0], I1->getParent());
+      bool OpReplaced = false;
+      for (int i = 0, e = I0->getNumOperands(); i != e; ++i)
+        if (I0->getOperand(i) == I0UnavOps[0]) {
+          I0->setOperand(i, NewPN);
+          OpReplaced = true;
         }
+      assert(OpReplaced && "Illegal transformation");
+    }
 
-        // Move the instruction at the end of HoistPt.
-        Instruction *Last = DestBB->getTerminator();
-        MD->removeInstruction(Repl);
-        Repl->moveBefore(Last);
-
-        DFSNumber[Repl] = DFSNumber[Last]++;
+    if (I0MemAccess) { // I0, I1 are mem-refs
+      assert(MSSA->getMemoryAccess(I1));
+      // This MSSA update is only for mem-refs with one use in a PHI in Succ BB.
+      MemoryAccess *I1MemAccess = MSSA->getMemoryAccess(I1);
+      MemoryUseOrDef *I0UD = cast<MemoryUseOrDef>(I0MemAccess);
+      MemoryUseOrDef *I1UD = cast<MemoryUseOrDef>(I1MemAccess);
+
+      MemoryAccess *I0Def = I0UD->getDefiningAccess();
+      MemoryAccess *I1Def = I1UD->getDefiningAccess();
+
+      MemoryPhi *NewPHI = MSSA->getMemoryAccess(SinkPt);
+      if (!NewPHI) {
+        NewPHI = MSSA->createMemoryPhi(SinkPt);
+        NewPHI->addIncoming(I0Def, I0->getParent());
+        NewPHI->addIncoming(I1Def, I1->getParent());
       }
+      I0UD->setDefiningAccess(NewPHI);
+      MSSAUpdater->removeMemoryAccess(I0MemAccess);
+      MSSAUpdater->removeMemoryAccess(I1MemAccess);
+    }
 
-      NR += removeAndReplace(InstructionsToHoist, Repl, DestBB, MoveAccess);
+    I1->eraseFromParent();
+    NumSunk++;
+    updateLocalStats(I0, NI, NL, NS, NC);
+  }
+  return {NI, NL + NC + NS};
+}
 
-      if (isa<LoadInst>(Repl))
-        ++NL;
-      else if (isa<StoreInst>(Repl))
-        ++NS;
-      else if (isa<CallInst>(Repl))
-        ++NC;
-      else // Scalar
-        ++NI;
+bool GVNHoist::getVN(Instruction *I) {
+  // Do not value number terminator instructions.
+  if (I->isTerminator())
+    return false;
+
+  if (auto *Load = dyn_cast<LoadInst>(I))
+    LI.insert(Load, VN);
+  else if (auto *Store = dyn_cast<StoreInst>(I))
+    SI.insert(Store, VN);
+  else if (auto *Call = dyn_cast<CallInst>(I)) {
+    if (auto *Intr = dyn_cast<IntrinsicInst>(Call)) {
+      if (isa<DbgInfoIntrinsic>(Intr) ||
+          Intr->getIntrinsicID() == Intrinsic::assume ||
+          Intr->getIntrinsicID() == Intrinsic::sideeffect)
+        return true;
     }
+    if (Call->mayHaveSideEffects())
+      return false;
+    if (Call->isConvergent())
+      return false;
+    CI.insert(Call, VN);
+  } else if (HoistingGeps || !isa<GetElementPtrInst>(I))
+    // Do not hoist scalars past calls that may write to memory because
+    // that could result in spills later. geps are handled separately.
+    // TODO: We can relax this for targets like AArch64 as they have more
+    // registers than X86.
+    II.insert(I, VN);
+
+  return true;
+}
 
-    NumHoisted += NL + NS + NC + NI;
-    NumRemoved += NR;
-    NumLoadsHoisted += NL;
-    NumStoresHoisted += NS;
-    NumCallsHoisted += NC;
-    return {NI, NL + NC + NS};
-  }
+void GVNHoist::barriersAndVNs(Function &F, bool Sinking) {
+  for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
+    int InstructionNb = 0;
+    bool BarrierFound = false;
+    SmallVector<std::pair<unsigned, unsigned>, 4> CurrentBBVNs;
+    for (Instruction &I1 : *BB) {
+      // If I1 cannot guarantee progress, subsequent instructions
+      // in BB cannot be hoisted anyways.
+      // TODO: For sink operation anything after hoist barrier is okay to sink
+      // but nothing before the hoist barrier.
+      if (!isGuaranteedToTransferExecutionToSuccessor(&I1)) {
+        // NumBarriers++;
+        HoistBarrier.insert(BB);
+        BarrierFound = true;
+        break;
+      }
 
-  // Hoist all expressions. Returns Number of scalars hoisted
-  // and number of non-scalars hoisted.
-  std::pair<unsigned, unsigned> hoistExpressions(Function &F) {
-    InsnInfo II;
-    LoadInfo LI;
-    StoreInfo SI;
-    CallInfo CI;
-    for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
-      int InstructionNb = 0;
-      for (Instruction &I1 : *BB) {
-        // If I1 cannot guarantee progress, subsequent instructions
-        // in BB cannot be hoisted anyways.
-        if (!isGuaranteedToTransferExecutionToSuccessor(&I1)) {
-          HoistBarrier.insert(BB);
+      // Only hoist the first instructions in BB up to MaxDepthInBB. Hoisting
+      // deeper may increase the register pressure and compilation time.
+      if (MaxDepthInBB != -1 && InstructionNb++ >= MaxDepthInBB)
+        break;
+
+      if (!getVN(&I1))
+        break;
+    }
+    if (Sinking && BarrierFound) {
+      // TODO: clear VNs for this basicblock.
+      // Instructions after the last barriers can be sunk.
+      for (BasicBlock::reverse_iterator It = BB->rbegin(), Ie = BB->rend();
+           It != Ie; ++It) {
+        if (!isGuaranteedToTransferExecutionToSuccessor(&*It))
           break;
-        }
-        // Only hoist the first instructions in BB up to MaxDepthInBB. Hoisting
-        // deeper may increase the register pressure and compilation time.
-        if (MaxDepthInBB != -1 && InstructionNb++ >= MaxDepthInBB)
+        if (!getVN(&*It))
           break;
+      }
+    }
+  }
+}
 
-        // Do not value number terminator instructions.
-        if (I1.isTerminator())
-          break;
+std::pair<unsigned, unsigned> GVNHoist::hoistExpressions(Function &F) {
+  barriersAndVNs(F, false);
+  HoistingPointList HPL;
+  computeInsertionPoints(II.getVNTable(), HPL, InsKind::Scalar);
+  computeInsertionPoints(LI.getVNTable(), HPL, InsKind::Load);
+  computeInsertionPoints(SI.getVNTable(), HPL, InsKind::Store);
+  computeInsertionPoints(CI.getScalarVNTable(), HPL, InsKind::Scalar);
+  computeInsertionPoints(CI.getLoadVNTable(), HPL, InsKind::Load);
+  computeInsertionPoints(CI.getStoreVNTable(), HPL, InsKind::Store);
+  return hoist(HPL);
+}
+
+std::pair<unsigned, unsigned> GVNHoist::sinkExpressions(Function &F) {
+  barriersAndVNs(F, false); // First clear VNs before barrier in that BB. see
+                            // TODO then set it to true.
+  std::pair<unsigned, unsigned> Count;
+  bool ContinueSink = false;
+  unsigned MaxSinkIter = MaxSinkChainLength;
+  unsigned SinkIter = 0;
+  do {
+    // Iteratively sink as long as there are candidates. Bail out after
+    // three iterations.
+    HoistingPointList HPL;
+    findSinkableInsn(II.getVNTable(), HPL, InsKind::Scalar);
+    findSinkableInsn(LI.getVNTable(), HPL, InsKind::Load);
+    findSinkableInsn(SI.getVNTable(), HPL, InsKind::Store);
+    findSinkableInsn(CI.getScalarVNTable(), HPL, InsKind::Scalar);
+    findSinkableInsn(CI.getLoadVNTable(), HPL, InsKind::Load);
+    findSinkableInsn(CI.getStoreVNTable(), HPL, InsKind::Store);
+
+    auto ThisCount = sink(HPL);
+    Count.first += ThisCount.first;
+    Count.second += ThisCount.second;
+    unsigned SunkCount = ThisCount.first + ThisCount.second;
+    ++SinkIter;
+    ContinueSink = SunkCount > 0 ? MaxSinkIter > SinkIter : false;
+  } while (ContinueSink);
+  return Count;
+}
 
-        if (auto *Load = dyn_cast<LoadInst>(&I1))
-          LI.insert(Load, VN);
-        else if (auto *Store = dyn_cast<StoreInst>(&I1))
-          SI.insert(Store, VN);
-        else if (auto *Call = dyn_cast<CallInst>(&I1)) {
-          if (auto *Intr = dyn_cast<IntrinsicInst>(Call)) {
-            if (isa<DbgInfoIntrinsic>(Intr) ||
-                Intr->getIntrinsicID() == Intrinsic::assume ||
-                Intr->getIntrinsicID() == Intrinsic::sideeffect)
-              continue;
+void GVNHoist::constructDJGraph(DomLevelsT &DomLevels, DenseSet<EdgeT> &JEdges,
+                                BBLevelT &BBLevels) {
+  for (auto DFI = df_begin(DT->getRootNode()), DFE = df_end(DT->getRootNode());
+       DFI != DFE; ++DFI) {
+    // Since getPathLength is inclusive of both the terminal nodes
+    // i.e., Entry and *DFI so decrease by 1.
+    unsigned Depth = DFI.getPathLength() - 1;
+    BasicBlock *BB = (*DFI)->getBlock();
+    DomLevels[BB] = Depth;
+    BBLevels[Depth].push_back(BB);
+    for (BasicBlock *Succ : successors(BB))
+      if (!DT->properlyDominates(BB, Succ)) {
+        JEdges.insert(std::make_pair(BB, Succ));
+      }
+  }
+}
+
+bool GVNHoist::constructMergeSet(DomLevelsT &DomLevels, DenseSet<EdgeT> &JEdges,
+                                 BBLevelT &BBLevels) {
+  bool Repeat = false;
+  DenseSet<EdgeT> VisJEdges; // Visited J Edges.
+  unsigned PrevLev = 0;
+  for (std::pair<const unsigned, BBLevelKeyT> &P : BBLevels) {
+    assert(PrevLev <= P.first);
+    PrevLev = P.first;
+    for (BasicBlock *CurrBB : P.second) {
+      for (auto PB = pred_begin(CurrBB), PE = pred_end(CurrBB); PB != PE;
+           ++PB) {
+        EdgeT Edge(*PB, CurrBB); // For all incoming edges to CurrBB.
+        if (JEdges.count(Edge) && !VisJEdges.count(Edge)) {
+          VisJEdges.insert(Edge); // Visit
+          BasicBlock *Src = Edge.first;
+          BasicBlock *Dst = Edge.second;
+          BasicBlock *INode = nullptr;
+          MergeSet[Dst].insert(Dst);                 // The target of JEdge.
+          while (DomLevels[Src] >= DomLevels[Dst]) { // A backedge.
+            LLVM_DEBUG(dbgs() << "\nVisiting: " << Src->getName() << " -> "
+                              << Dst->getName());
+            // Merge (tmp) = Merge (tmp) U Merge (tnode) U { tnode }
+            // MergeSet(tnode) contains tnode.
+            MergeSet[Src].insert(MergeSet[Dst].begin(), MergeSet[Dst].end());
+            INode = Src;
+            LLVM_DEBUG(dbgs() << ", IDom of " << Src->getName() << " is ");
+            Src = DT->getNode(Src)->getIDom()->getBlock();
+            LLVM_DEBUG(dbgs() << Src->getName());
           }
-          if (Call->mayHaveSideEffects())
-            break;
-
-          if (Call->isConvergent())
-            break;
-
-          CI.insert(Call, VN);
-        } else if (HoistingGeps || !isa<GetElementPtrInst>(&I1))
-          // Do not hoist scalars past calls that may write to memory because
-          // that could result in spills later. geps are handled separately.
-          // TODO: We can relax this for targets like AArch64 as they have more
-          // registers than X86.
-          II.insert(&I1, VN);
+          for (auto PINode = pred_begin(INode), PENode = pred_end(INode);
+               PINode != PENode; ++PINode) { // INode is an ancestor of SNode.
+            EdgeT Edge(*PINode, INode);
+            if (VisJEdges.count(Edge)) {
+              assert(JEdges.count(Edge));
+              BasicBlock *SNode = *PINode;
+              // Check inconsistency, MergeSet[Dest] subset of MergeSet[Src]
+              if (!isSubset(MergeSet[INode], MergeSet[SNode]))
+                Repeat = true;
+            }
+          }
+        }
       }
     }
+  }
+  return Repeat;
+}
 
-    HoistingPointList HPL;
-    computeInsertionPoints(II.getVNTable(), HPL, InsKind::Scalar);
-    computeInsertionPoints(LI.getVNTable(), HPL, InsKind::Load);
-    computeInsertionPoints(SI.getVNTable(), HPL, InsKind::Store);
-    computeInsertionPoints(CI.getScalarVNTable(), HPL, InsKind::Scalar);
-    computeInsertionPoints(CI.getLoadVNTable(), HPL, InsKind::Load);
-    computeInsertionPoints(CI.getStoreVNTable(), HPL, InsKind::Store);
-    return hoist(HPL);
+const Instruction *GVNHoist::lastUser(const Instruction *I,
+                                      const Value *Val) const {
+  // TODO: Make isLiveOutUsingMergeSet take const parameters.
+  assert(!isLiveOutUsingMergeSet(const_cast<BasicBlock *>(I->getParent()),
+                                 const_cast<Value *>(Val)));
+  const BasicBlock *BB = I->getParent();
+  BasicBlock::const_iterator BI(I), BE = BB->end();
+  unsigned ICount = std::distance(BI, BE);
+  // If there are less #uses than ICount it is cheaper to iterate on Val->users.
+  if (Val->getNumUses() <= ICount) { // Iterate on uses
+    for (const User *U : Val->users()) {
+      // User maybe weird gep that is not an instruction.
+      if (const Instruction *UserI = dyn_cast<Instruction>(U)) {
+        if (UserI != I && UserI->getParent() == BB) {
+          if (firstInBB(I, UserI)) // I precedes another Use => not a kill.
+            return UserI;
+        }
+      }
+    }
+    return I;
   }
-};
+  // else Iterate on Instructions
+  for (++BI; BI != BE; ++BI) {
+    for (unsigned i = 0; i < BI->getNumOperands(); ++i)
+      if (BI->getOperandUse(i).get() == Val)
+        return &*BI;
+  }
+  return I;
+}
+
+bool GVNHoist::isLiveOutUsingMergeSet(BasicBlock *BB, Value *Val) const {
+  assert(BB);
+  const BasicBlock *ValDefBB = nullptr; // BasicBlock defining Val
+  if (Instruction *I = dyn_cast<Instruction>(Val))
+    ValDefBB = I->getParent();
+  // FIXME!
+  // We are assuming when DefBB is not defined then the value is a parameter.
+
+  // Case when Val is defined in BB, if any of the use is outside BB (DefBB)
+  // then it must be live-out.
+  if (ValDefBB == BB)
+    for (User *U : Val->users()) {
+      if (cast<Instruction>(U)->getParent() != BB)
+        return true;
+    }
+
+  // Mr(n) = M(n) U {n}; Create a new set from the merge set
+  // Ms(n) = Ms(n) U Mr(w); For each successor w of BB
+  SmallSetBB Ms; // Ms = null-set
+  for (BasicBlock *Succ : successors(BB)) {
+    Ms.insert(Succ); // Mr(Succ) = Succ U M(Succ)
+    for (BasicBlock *BB : MergeSet.lookup(Succ))
+      Ms.insert(BB); // M(Succ)
+  }
+
+  // Iterate over all the uses of Val, if any of its users is in the mergeset
+  // of \p BB then Val is LiveOut from BB.
+  for (User *U : Val->users()) {
+    BasicBlock *UserDefBB = nullptr;
+    if (Instruction *I = dyn_cast<Instruction>(U))
+      UserDefBB = I->getParent();
+    else // Assuming live-out conservatively, the user can be a global
+      // TODO: maybe return false is okay??
+      return true; // llvm_unreachable("User is not an instruction.");
+    // For globals, skip the MergeSet checks.
+    if (UserDefBB->getParent() != BB->getParent())
+      continue;
+    while (UserDefBB && (UserDefBB != ValDefBB)) {
+      if (Ms.count(UserDefBB)) // if t /\ Ms(n) then return true;
+        return true;
+      auto N = DT->getNode(UserDefBB);
+      if (auto IDom = N->getIDom())
+        UserDefBB = IDom->getBlock();
+      else // Reached the beginning of the function => maybe a global.
+        return false; // Assume that global doesn't flow live-out from BB
+    }
+  }
+  return false;
+}
 
 class GVNHoistLegacyPass : public FunctionPass {
 public:
diff --git a/llvm/test/Transforms/GVNHoist/broken-mod-sink.ll b/llvm/test/Transforms/GVNHoist/broken-mod-sink.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/GVNHoist/broken-mod-sink.ll
@@ -0,0 +1,100 @@
+; RUN: opt -S -gvn-hoist -gvn-hoist-check-profitability=false < %s | FileCheck %s
+
+; Check that all and, inttoptr, and bitcast is sunk.
+; CHECK-LAEBL: @baz0
+; CHECK-LABEL: bb9:
+; CHECK: and
+; CHECK: inttoptr
+; CHECK: bitcast
+
+
+; ModuleID = 'broken-mod.ll'
+
+%struct.pluto = type { i32 (...)**, i8, i8, i16, i32 }
+
+define void @baz0(i64 %a) {
+bb:
+  br i1 undef, label %bb1, label %bb4
+
+bb1:                                              ; preds = %bb
+  %tmp = and i64 %a, -8
+  %tmp2 = inttoptr i64 %tmp to i8*
+  %tmp3 = bitcast i8* %tmp2 to %struct.pluto*
+  br label %bb9
+
+bb4:                                              ; preds = %bb
+  br i1 undef, label %bb5, label %bb11
+
+bb5:                                              ; preds = %bb4
+  %tmp6 = and i64 %a, -8
+  %tmp7 = inttoptr i64 %tmp6 to i8*
+  %tmp8 = bitcast i8* %tmp7 to %struct.pluto*
+  br label %bb9
+
+bb9:                                              ; preds = %bb5, %bb1
+  %tmp10 = phi %struct.pluto* [ %tmp8, %bb5 ], [ %tmp3, %bb1 ]
+  unreachable
+
+bb11:                                             ; preds = %bb4
+  ret void
+}
+
+; CHECK-LABEL: @baz
+; CHECK-LABEL: bb9:
+; CHECK: bitcast
+; Function Attrs: nounwind uwtable
+define void @baz() {
+bb:
+  %tmp = and i64 undef, -8
+  %tmp2 = inttoptr i64 %tmp to i8*
+  br i1 undef, label %bb1, label %bb4
+
+bb1:                                              ; preds = %bb
+  %tmp3 = bitcast i8* %tmp2 to %struct.pluto*
+  br label %bb9
+
+bb4:                                              ; preds = %bb
+  br i1 undef, label %bb5, label %bb11
+
+bb5:                                              ; preds = %bb4
+  %tmp8 = bitcast i8* %tmp2 to %struct.pluto*
+  br label %bb9
+
+bb9:                                              ; preds = %bb5, %bb1
+  %tmp10 = phi %struct.pluto* [ %tmp8, %bb5 ], [ %tmp3, %bb1 ]
+  unreachable
+
+bb11:                                             ; preds = %bb4
+  ret void
+}
+
+; CHECK-LABEL: @baz1
+; CHECK-LABEL: bb9:
+; CHECK: inttoptr
+; CHECK: bitcast
+define void @baz1() {
+bb:
+  %tmp = and i64 undef, -8
+  br i1 undef, label %bb1, label %bb4
+
+bb1:                                              ; preds = %bb
+  %tmp2 = inttoptr i64 %tmp to i8*
+  %tmp10 = bitcast i8* %tmp2 to %struct.pluto*
+  br label %bb9
+
+bb4:                                              ; preds = %bb
+  br i1 undef, label %bb5, label %bb11
+
+bb5:                                              ; preds = %bb4
+  %tmp3 = inttoptr i64 %tmp to i8*
+  %tmp8 = bitcast i8* %tmp3 to %struct.pluto*
+  br label %bb9
+
+bb9:                                              ; preds = %bb5, %bb1
+  %tmp11 = phi %struct.pluto* [ %tmp8, %bb5 ], [ %tmp10, %bb1 ]
+  unreachable
+
+bb11:                                             ; preds = %bb4
+  ret void
+}
+
diff --git a/llvm/test/Transforms/GVNHoist/dj-edge-detect-das-taco.ll b/llvm/test/Transforms/GVNHoist/dj-edge-detect-das-taco.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/GVNHoist/dj-edge-detect-das-taco.ll
@@ -0,0 +1,62 @@
+; RUN: opt -S -gvn-hoist -debug < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+; Testcase taken from:
+; Das, Dibyendu, B. Dupont De Dinechin, and Ramakrishna Upadrasta. "Efficient
+; liveness computation using merge sets and dj-graphs." ACM Transactions on
+; Architecture and Code Optimization (TACO) 8.4 (2012): 27.
+
+; check the J edges
+; CHECK: Found a JEdge: bb10 -> bb8
+; CHECK: Found a JEdge: bb4 -> bb5
+; CHECK: Found a JEdge: bb5 -> bb6
+; CHECK: Found a JEdge: bb6 -> bb5
+; CHECK: Found a JEdge: bb7 -> bb2
+; CHECK: Found a JEdge: bb9 -> bb6
+
+; check the mergesets
+; CHECK: MergeSet of: bb10: bb2, bb8, bb5, bb6,
+; CHECK: MergeSet of: bb2: bb2,
+; CHECK: MergeSet of: bb3: bb2,
+; CHECK: MergeSet of: bb4: bb2, bb5, bb6,
+; CHECK: MergeSet of: bb5: bb2, bb5, bb6,
+; CHECK: MergeSet of: bb6: bb2, bb5, bb6,
+; CHECK: MergeSet of: bb7: bb2,
+; CHECK: MergeSet of: bb8: bb2, bb8, bb5, bb6,
+; CHECK: MergeSet of: bb9: bb2, bb8, bb5, bb6,
+
+
+define void @irreducible_loop() {
+bb1:
+br label %bb2
+
+bb2:
+  br i1 undef, label %bb3, label %bb11
+
+bb3:
+  br i1 undef, label %bb4, label %bb8
+
+bb4:
+  br label %bb5
+
+bb5:
+  br label %bb6
+
+bb6:
+  br i1 undef, label %bb7, label %bb5
+
+bb7:
+  br label %bb2
+
+bb8:
+  br label %bb9
+
+bb9:
+  br i1 undef, label %bb6, label %bb10
+
+bb10:
+  br label %bb8
+
+bb11:
+  ret void
+}
\ No newline at end of file
diff --git a/llvm/test/Transforms/GVNHoist/dj-edge-detect.ll b/llvm/test/Transforms/GVNHoist/dj-edge-detect.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/GVNHoist/dj-edge-detect.ll
@@ -0,0 +1,108 @@
+; RUN: opt -S -gvn-hoist -debug < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+; check the J edges
+; CHECK: Found a JEdge: bb10 -> bb18
+; CHECK: Found a JEdge: bb13 -> bb18
+; CHECK: Found a JEdge: bb15 -> bb18
+; CHECK: Found a JEdge: bb18 -> bb6
+; CHECK: Found a JEdge: bb2 -> bb3
+; CHECK: Found a JEdge: bb8 -> bb18
+
+
+; check the mergesets
+; CHECK: MergeSet of: bb10: bb3, bb6, bb18,
+; CHECK: MergeSet of: bb13: bb3, bb6, bb18,
+; CHECK: MergeSet of: bb15: bb3, bb6, bb18,
+; CHECK: MergeSet of: bb18: bb3, bb6, bb18,
+; CHECK: MergeSet of: bb2: bb3,
+; CHECK: MergeSet of: bb3: bb3,
+; CHECK: MergeSet of: bb6: bb3, bb6,
+; CHECK: MergeSet of: bb8: bb3, bb6, bb18,
+
+
+%0 = type { %1*, %8* }
+%1 = type { %2*, %3* }
+%2 = type { i32, i32, i32, i32, i32*, i8*, i32 }
+%3 = type { %4, i32, i32, %7*, i8, i8, i8, i8, i8 }
+%4 = type { i8, i32, i8*, %5 }
+%5 = type { %6 }
+%6 = type { i16 }
+%7 = type { i8*, i8*, i8*, i8*, i8* }
+%8 = type <{ i8*, i8*, i32, %9, %17*, %19* }>
+%9 = type { %10*, i8*, i8*, i8* (i8*)*, i32* (i8*)*, i8*, %15, i32, i32, i8*, %16*, i32, i8, i8*, i8, i8* }
+%10 = type { i8*, %11*, i8*, %12, %13*, i8*, i32 (i8*)*, i32, i8, %14 }
+%11 = type { i8*, i32 (i8*, %12*, i8*)*, i32 (i8*)*, i32 (i8*, i32)*, %16* (i8*)*, i32 (i8*)*, i32 (i8*)*, i32 (...)*, i32 (...)*, i32 (...)*, void (i8*, i32)*, void (i8*, i32, i8*, i32)*, i32 (i8*, i32, i8*)* }
+%12 = type { i8*, i32 (i8*, i32, i8*, i32, i8)*, i32 (i8*, i32, i8*)*, void (i8*, i8*, i32)*, i32 (i8*)*, i32 (i8*, i32, i32)* }
+%13 = type { i32 (i8*, i8*, i32)*, i32 (i8*, i32, i32)* }
+%14 = type { i32, i32, i8, i8, i8 }
+%15 = type { i32, i32, i32 }
+%16 = type { i16, i16, i32, i32 }
+%17 = type { i8*, i8*, i16 (i8*, i8*, i16)*, i32 (i8*, i8, i32)*, i32 (%16*, i32)*, %18, i32, i32 }
+%18 = type { i8*, i8* (i8*, i8*, i32)* }
+%19 = type { %20, i32, i32 }
+%20 = type { i32, i32, [8 x i8], i8, i8, i16 }
+
+@global = external hidden unnamed_addr constant [10 x i8], align 1
+
+define void @ham(i8* nocapture readnone %arg) local_unnamed_addr {
+bb:
+  %tmp = alloca %0*, align 8
+  %tmp1 = bitcast %0** %tmp to i8*
+  br label %bb3
+
+bb2:                                              ; preds = %bb6
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %tmp4 = load %0*, %0** %tmp, align 8, !tbaa !0
+  %tmp5 = call i32 @quux(i8* %tmp1)
+  br label %bb6
+
+bb6:                                              ; preds = %bb18, %bb3
+  %tmp7 = phi i32 [ %tmp5, %bb3 ], [ %tmp19, %bb18 ]
+  switch i32 %tmp7, label %bb15 [
+    i32 0, label %bb2
+    i32 35, label %bb8
+    i32 11, label %bb10
+    i32 12, label %bb13
+  ]
+
+bb8:                                              ; preds = %bb6
+  %tmp9 = load %0*, %0** %tmp, align 8, !tbaa !0
+  br label %bb18
+
+bb10:                                             ; preds = %bb6
+  %tmp11 = load %0*, %0** %tmp, align 8, !tbaa !0
+  %tmp12 = call i32 @quux(i8* %tmp1)
+  br label %bb18
+
+bb13:                                             ; preds = %bb6
+  %tmp14 = call i32 @quux(i8* %tmp1)
+  br label %bb18
+
+bb15:                                             ; preds = %bb6
+  %tmp16 = load %0*, %0** %tmp, align 8, !tbaa !0
+  %tmp17 = call i32 @quux(i8* null)
+  br label %bb18
+
+bb18:                                             ; preds = %bb15, %bb13, %bb10, %bb8
+  %tmp19 = phi i32 [ %tmp17, %bb15 ], [ %tmp14, %bb13 ], [ %tmp12, %bb10 ], [ 0, %bb8 ]
+  br label %bb6
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+
+declare i32 @zot(%0**)
+
+declare i32 @quux(i8*)
+
+declare i32 @wombat(i8* nocapture readonly, ...)
+
+attributes #0 = { argmemonly nounwind }
+
+!0 = !{!1, !1, i64 0}
+!1 = !{!"any pointer", !2, i64 0}
+!2 = !{!"omnipotent char", !3, i64 0}
+!3 = !{!"Simple C/C++ TBAA"}
diff --git a/llvm/test/Transforms/GVNHoist/gvn-sink-test-1.ll b/llvm/test/Transforms/GVNHoist/gvn-sink-test-1.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/GVNHoist/gvn-sink-test-1.ll
@@ -0,0 +1,49 @@
+; RUN: opt -S -gvn-hoist -debug < %s
+; ModuleID = 'gvn-sink-test.ll'
+source_filename = "bugpoint-output-9bbed91.bc"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.spam = type opaque
+
+declare void @bar() local_unnamed_addr #0
+
+; Function Attrs: noinline nounwind uwtable
+define fastcc void @wombat() unnamed_addr #1 {
+bb:
+  br i1 undef, label %bb1, label %bb10
+
+bb1:                                              ; preds = %bb
+  br i1 undef, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb1
+  unreachable
+
+bb3:                                              ; preds = %bb1
+  br i1 undef, label %bb4, label %bb5
+
+bb4:                                              ; preds = %bb3
+  %tmp = load %struct.spam*, %struct.spam** undef, align 8
+  br label %bb7
+
+bb5:                                              ; preds = %bb3
+  %tmp6 = load %struct.spam*, %struct.spam** undef, align 8
+  br label %bb7
+
+bb7:                                              ; preds = %bb5, %bb4
+  %tmp8 = phi %struct.spam* [ %tmp, %bb4 ], [ %tmp6, %bb5 ]
+  call void @bar() #2
+  %tmp9 = load %struct.spam*, %struct.spam** undef, align 8
+  unreachable
+
+bb10:                                             ; preds = %bb
+  ret void
+}
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 5.0.0 "}
diff --git a/llvm/test/Transforms/GVNHoist/hoist-convergent.ll b/llvm/test/Transforms/GVNHoist/hoist-convergent.ll
--- a/llvm/test/Transforms/GVNHoist/hoist-convergent.ll
+++ b/llvm/test/Transforms/GVNHoist/hoist-convergent.ll
@@ -1,4 +1,4 @@
-; RUN: opt -gvn-hoist -S < %s | FileCheck %s
+; RUN: opt -gvn-hoist -gvn-hoist-check-profitability=false -S < %s | FileCheck %s
 
 ; Check that convergent calls are not hoisted.
 ;
diff --git a/llvm/test/Transforms/GVNHoist/hoist-md.ll b/llvm/test/Transforms/GVNHoist/hoist-md.ll
--- a/llvm/test/Transforms/GVNHoist/hoist-md.ll
+++ b/llvm/test/Transforms/GVNHoist/hoist-md.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -gvn-hoist < %s | FileCheck %s
+; RUN: opt -S -gvn-hoist -gvn-hoist-check-profitability=false < %s | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/llvm/test/Transforms/GVNHoist/hoist-mssa.ll b/llvm/test/Transforms/GVNHoist/hoist-mssa.ll
--- a/llvm/test/Transforms/GVNHoist/hoist-mssa.ll
+++ b/llvm/test/Transforms/GVNHoist/hoist-mssa.ll
@@ -1,7 +1,10 @@
-; RUN: opt -S -gvn-hoist -newgvn < %s | FileCheck %s
+; RUN: opt -S -gvn-hoist -gvn-hoist-check-profitability=false -newgvn < %s | FileCheck %s
 
-; Check that store hoisting works: there should be only one store left.
-; CHECK-LABEL: @getopt
+; Check that store hoisting works: there should be only one store hoisted
+; Hoist load as well because the @optind global is killed in bb4 (at least once)
+; CHECK-LABEL: @getopt(
+; CHECK: load i32
+; CHECK: add nsw i32
 ; CHECK: store i32
 ; CHECK-NOT: store i32
 
@@ -47,6 +50,51 @@
   ret void
 }
 
+; Check that store hoisting works: there should be only one store left because
+; load should also be hoisted in this case.
+; CHECK-LABEL: @getopt_hoist_load(
+; CHECK: store i32
+; CHECK-NOT: store i32
+define void @getopt_hoist_load(i32* %p1) {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb
+  br i1 undef, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb1
+  br label %bb13
+
+bb3:                                              ; preds = %bb1
+  br i1 undef, label %bb4, label %bb9
+
+bb4:                                              ; preds = %bb3
+  %tmp = load i32, i32* @optind, align 4
+  br i1 undef, label %bb5, label %bb7
+
+bb5:                                              ; preds = %bb4
+  %tmp6 = add nsw i32 %tmp, 1
+  store i32 %tmp6, i32* %p1, align 4
+  br label %bb12
+
+bb7:                                              ; preds = %bb4
+  %tmp8 = add nsw i32 %tmp, 1
+  store i32 %tmp8, i32* %p1, align 4
+  br label %bb13
+
+bb9:                                              ; preds = %bb3
+  %tmp10 = load i32, i32* @optind, align 4
+  %tmp11 = add nsw i32 %tmp10, 1
+  store i32 %tmp11, i32* %p1, align 4
+  br label %bb12
+
+bb12:                                             ; preds = %bb9, %bb5
+  br label %bb13
+
+bb13:                                             ; preds = %bb12, %bb7, %bb2
+  ret void
+}
+
 @GlobalVar = internal global float 1.000000e+00
 
 ; Check that we hoist stores and remove the MSSA phi node.
diff --git a/llvm/test/Transforms/GVNHoist/hoist-pr20242.ll b/llvm/test/Transforms/GVNHoist/hoist-pr20242.ll
--- a/llvm/test/Transforms/GVNHoist/hoist-pr20242.ll
+++ b/llvm/test/Transforms/GVNHoist/hoist-pr20242.ll
@@ -1,4 +1,4 @@
-; RUN: opt -gvn-hoist -newgvn -gvn-hoist  -S < %s | FileCheck %s
+; RUN: opt -gvn-hoist -newgvn -gvn-hoist -gvn-hoist-check-profitability=false  -S < %s | FileCheck %s
 ; Test to demonstrate that newgvn creates opportunities for
 ; more gvn-hoist when sibling branches contain identical expressions.
 
diff --git a/llvm/test/Transforms/GVNHoist/hoist-pr22005.ll b/llvm/test/Transforms/GVNHoist/hoist-pr22005.ll
--- a/llvm/test/Transforms/GVNHoist/hoist-pr22005.ll
+++ b/llvm/test/Transforms/GVNHoist/hoist-pr22005.ll
@@ -1,4 +1,4 @@
-; RUN: opt -gvn-hoist -S < %s | FileCheck %s
+; RUN: opt -gvn-hoist -gvn-hoist-check-profitability=false -S < %s | FileCheck %s
 target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/llvm/test/Transforms/GVNHoist/hoist-recursive-geps.ll b/llvm/test/Transforms/GVNHoist/hoist-recursive-geps.ll
--- a/llvm/test/Transforms/GVNHoist/hoist-recursive-geps.ll
+++ b/llvm/test/Transforms/GVNHoist/hoist-recursive-geps.ll
@@ -1,4 +1,4 @@
-; RUN: opt -gvn-hoist -newgvn -gvn-hoist -S < %s | FileCheck %s
+; RUN: opt -gvn-hoist -newgvn -gvn-hoist -gvn-hoist-check-profitability=false -S < %s | FileCheck %s
 
 ; Check that recursive GEPs are hoisted. Since hoisting creates
 ; fully redundant instructions, newgvn is run to remove them which then
diff --git a/llvm/test/Transforms/GVNHoist/hoist-very-busy.ll b/llvm/test/Transforms/GVNHoist/hoist-very-busy.ll
--- a/llvm/test/Transforms/GVNHoist/hoist-very-busy.ll
+++ b/llvm/test/Transforms/GVNHoist/hoist-very-busy.ll
@@ -53,3 +53,25 @@
   store i32 1, i32* @G
   ret void
 }
+
+
+; Check that the call and fcmp are hoisted.
+; CHECK-LABEL: define void @bun(
+; CHECK: store
+; CHECK-NOT: store
+
+define void @bun() {
+entry:
+  br label %if.then
+
+if.then:                                          ; preds = %entry
+  br i1 undef, label %sw0, label %sw1
+
+sw0:
+  store i32 1, i32* @G
+  unreachable
+
+sw1:
+  store i32 1, i32* @G
+  ret void
+}
diff --git a/llvm/test/Transforms/GVNHoist/hoist.ll b/llvm/test/Transforms/GVNHoist/hoist.ll
--- a/llvm/test/Transforms/GVNHoist/hoist.ll
+++ b/llvm/test/Transforms/GVNHoist/hoist.ll
@@ -1,4 +1,4 @@
-; RUN: opt -gvn-hoist -S < %s | FileCheck %s
+; RUN: opt -gvn-hoist -gvn-hoist-check-profitability=false -S < %s | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/llvm/test/Transforms/GVNHoist/infinite-loop-direct.ll b/llvm/test/Transforms/GVNHoist/infinite-loop-direct.ll
--- a/llvm/test/Transforms/GVNHoist/infinite-loop-direct.ll
+++ b/llvm/test/Transforms/GVNHoist/infinite-loop-direct.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -gvn-hoist < %s | FileCheck %s
+; RUN: opt -S -gvn-hoist -gvn-hoist-check-profitability=false < %s | FileCheck %s
 
 ; Checking gvn-hoist in case of infinite loops and irreducible control flow.
 
diff --git a/llvm/test/Transforms/GVNHoist/infinite-loop-indirect.ll b/llvm/test/Transforms/GVNHoist/infinite-loop-indirect.ll
--- a/llvm/test/Transforms/GVNHoist/infinite-loop-indirect.ll
+++ b/llvm/test/Transforms/GVNHoist/infinite-loop-indirect.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -gvn-hoist < %s | FileCheck %s
+; RUN: opt -S -gvn-hoist -gvn-hoist-check-profitability=false < %s | FileCheck %s
 
 ; Checking gvn-hoist in case of indirect branches.
 
diff --git a/llvm/test/Transforms/GVNHoist/int_sideeffect.ll b/llvm/test/Transforms/GVNHoist/int_sideeffect.ll
--- a/llvm/test/Transforms/GVNHoist/int_sideeffect.ll
+++ b/llvm/test/Transforms/GVNHoist/int_sideeffect.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S < %s -gvn-hoist | FileCheck %s
+; RUN: opt -S < %s -gvn-hoist -gvn-hoist-check-profitability=false | FileCheck %s
 
 declare void @llvm.sideeffect()
 
diff --git a/llvm/test/Transforms/GVNHoist/pr37445.ll b/llvm/test/Transforms/GVNHoist/pr37445.ll
--- a/llvm/test/Transforms/GVNHoist/pr37445.ll
+++ b/llvm/test/Transforms/GVNHoist/pr37445.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -early-cse-memssa -gvn-hoist -S | FileCheck %s
+; RUN: opt < %s -early-cse-memssa -gvn-hoist -gvn-hoist-check-profitability=false -S | FileCheck %s
 
 ; Make sure opt won't crash and that this pair of
 ; instructions (load, icmp) is hoisted successfully
diff --git a/llvm/test/Transforms/GVNHoist/sink-load-dom-numbering.ll b/llvm/test/Transforms/GVNHoist/sink-load-dom-numbering.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/GVNHoist/sink-load-dom-numbering.ll
@@ -0,0 +1,58 @@
+; RUN: opt -S -gvn-hoist < %s | FileCheck %s
+
+; CHECK: load
+; CHECK-NOT: load
+
+%struct.blam = type { %struct.wibble }
+%struct.wibble = type { i8*, i32 }
+
+declare void @hoge()
+
+define void @eggs() {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb
+  br label %bb2
+
+bb2:                                              ; preds = %bb1
+  br i1 undef, label %bb3, label %bb4
+
+bb3:                                              ; preds = %bb2
+  unreachable
+
+bb4:                                              ; preds = %bb2
+  br label %bb5
+
+bb5:                                              ; preds = %bb4
+  br i1 undef, label %bb6, label %bb9
+
+bb6:                                              ; preds = %bb5
+  br i1 undef, label %bb8, label %bb7
+
+bb7:                                              ; preds = %bb6
+  call void @hoge()
+  br label %bb8
+
+bb8:                                              ; preds = %bb7, %bb6
+  %tmp = load %struct.blam*, %struct.blam** undef, align 8, !tbaa !1
+  br label %bb11
+
+bb9:                                              ; preds = %bb5
+  %tmp10 = load %struct.blam*, %struct.blam** undef, align 8, !tbaa !1
+  br label %bb11
+
+bb11:                                             ; preds = %bb9, %bb8
+  %tmp12 = phi %struct.blam* [ %tmp, %bb8 ], [ %tmp10, %bb9 ]
+  unreachable
+}
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 5.0.0 "}
+!1 = !{!2, !3, i64 8}
+!2 = !{!"tree_common", !3, i64 0, !3, i64 8, !6, i64 16, !6, i64 17, !6, i64 17, !6, i64 17, !6, i64 17, !6, i64 17, !6, i64 17, !6, i64 17, !6, i64 17, !6, i64 18, !6, i64 18, !6, i64 18, !6, i64 18, !6, i64 18, !6, i64 18, !6, i64 18, !6, i64 18, !6, i64 19, !6, i64 19, !6, i64 19, !6, i64 19, !6, i64 19, !6, i64 19, !6, i64 19, !6, i64 19}
+!3 = !{!"any pointer", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+!6 = !{!"int", !4, i64 0}
diff --git a/llvm/test/Transforms/GVNHoist/sink-load-with-memory-phi-1.ll b/llvm/test/Transforms/GVNHoist/sink-load-with-memory-phi-1.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/GVNHoist/sink-load-with-memory-phi-1.ll
@@ -0,0 +1,57 @@
+; RUN: opt -S -gvn-hoist < %s | FileCheck %s
+
+; Check that load is sunk to its successor.
+; CHECK: load
+; CHECK-NOT: load
+
+@reg_last_set_invalid = external global i8*, align 8
+
+; Function Attrs: nounwind uwtable
+define void @record_value_for_reg() {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %entry
+  br label %for.cond139
+
+for.cond139:                                      ; preds = %if.end156, %for.cond
+  br i1 undef, label %for.body142, label %for.end159
+
+for.body142:                                      ; preds = %for.cond139
+  store i32 undef, i32* undef, align 4, !tbaa !1
+  br i1 undef, label %land.lhs.true146, label %if.else
+
+land.lhs.true146:                                 ; preds = %for.body142
+  br i1 undef, label %if.then151, label %if.else
+
+if.then151:                                       ; preds = %land.lhs.true146
+  %0 = load i8*, i8** @reg_last_set_invalid, align 8, !tbaa !5
+  br label %if.end156
+
+if.else:                                          ; preds = %land.lhs.true146, %for.body142
+  %1 = load i8*, i8** @reg_last_set_invalid, align 8, !tbaa !5
+  br label %if.end156
+
+if.end156:                                        ; preds = %if.else, %if.then151
+  %.sink5 = phi i8* [ %1, %if.else ], [ %0, %if.then151 ]
+  br label %for.cond139
+
+for.end159:                                       ; preds = %for.cond139
+  br i1 undef, label %land.lhs.true161, label %if.end174
+
+land.lhs.true161:                                 ; preds = %for.end159
+  br label %if.end174
+
+if.end174:                                        ; preds = %land.lhs.true161, %for.end159
+  unreachable
+}
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 5.0.0 "}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"any pointer", !3, i64 0}
diff --git a/llvm/test/Transforms/GVNHoist/sink-load-with-memory-phi-2.ll b/llvm/test/Transforms/GVNHoist/sink-load-with-memory-phi-2.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/GVNHoist/sink-load-with-memory-phi-2.ll
@@ -0,0 +1,45 @@
+; RUN: opt -S -gvn-hoist < %s | FileCheck %s
+
+; CHECK: load
+; CHECK-NOT: load
+@reg_last_set_invalid = external global i8*, align 8
+
+; Function Attrs: nounwind uwtable
+define void @record_value_for_reg() {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %entry
+  br label %for.cond139
+
+for.cond139:                                      ; preds = %if.end156, %for.cond
+  br label %for.body142
+
+for.body142:                                      ; preds = %for.cond139
+  br i1 undef, label %land.lhs.true146, label %if.else
+
+land.lhs.true146:                                 ; preds = %for.body142
+  br i1 undef, label %if.then151, label %if.else
+
+if.then151:                                       ; preds = %land.lhs.true146
+  %0 = load i8*, i8** @reg_last_set_invalid, align 8, !tbaa !1
+  br label %if.end156
+
+if.else:                                          ; preds = %land.lhs.true146, %for.body142
+  %1 = load i8*, i8** @reg_last_set_invalid, align 8, !tbaa !1
+  br label %if.end156
+
+if.end156:                                        ; preds = %if.else, %if.then151
+  %.sink5 = phi i8* [ %1, %if.else ], [ %0, %if.then151 ]
+  store i8 undef, i8* undef, align 1, !tbaa !5
+  br label %for.cond139
+}
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 5.0.0 "}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"any pointer", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!3, !3, i64 0}
diff --git a/llvm/test/Transforms/GVNHoist/sink-mul-with-liveout.ll b/llvm/test/Transforms/GVNHoist/sink-mul-with-liveout.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/GVNHoist/sink-mul-with-liveout.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S -gvn-hoist < %s | FileCheck %s
+
+; Check that a mul is sunk to last basic block
+; CHECK-LABEL: @foo
+; CHECK: if.then:
+; CHECK: mul13
+; CHECK-LABEL: if.else:
+; CHECK: mul21
+; CHECK-LABEL: if.end:
+; CHECK buf.sroa.14 = phi i32 [ undef, %if.then ], [ %x, %if.else ]
+; CHECK buf.sroa.16 = phi i32 [ 1, %if.then ], [ %mul21, %if.else ]
+; CHECK: phi i32 [ %mul13, %if.then ], [ %mul21, %if.else ]
+; CHECK: mul nsw i32 {{.*}}, %z
+
+define void @foo(i32 %x, i32 %y, i32 %z, i32 %w, i1 %interleaved) {
+entry:
+  br i1 %interleaved, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %mul13 = mul nsw i32 %x, %y
+  %mul14 = mul nsw i32 %mul13, %z
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %mul21 = mul nsw i32 %x, %y
+  %mul25 = mul nsw i32 %mul21, %z
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %buf.sroa.14 = phi i32 [ undef, %if.then ], [ %x, %if.else ]
+  %buf.sroa.16 = phi i32 [ 1, %if.then ], [ %mul21, %if.else ]
+  %buf.sroa.18 = phi i32 [ %mul14, %if.then ], [ %mul25, %if.else ]
+  %conv39 = sext i32 %z to i64
+  ret void
+}