Index: llvm/include/llvm/Support/GenericDomTree.h
===================================================================
--- llvm/include/llvm/Support/GenericDomTree.h
+++ llvm/include/llvm/Support/GenericDomTree.h
@@ -721,11 +721,12 @@
                 llvm::make_unique<DomTreeNodeBase<NodeT>>(BB, IDomNode))).get();
   }
 
-  NodeT *getIDom(NodeT *BB) const { return IDoms.lookup(BB); }
-
   void addRoot(NodeT *BB) { this->Roots.push_back(BB); }
 
 public:
+
+  NodeT *getIDom(NodeT *BB) const { return IDoms.lookup(BB); }
+
   /// updateDFSNumbers - Assign In and Out numbers to the nodes while walking
   /// dominator tree in dfs order.
   void updateDFSNumbers() const {
Index: llvm/include/llvm/Support/GenericDomTreeConstruction.h
===================================================================
--- llvm/include/llvm/Support/GenericDomTreeConstruction.h
+++ llvm/include/llvm/Support/GenericDomTreeConstruction.h
@@ -269,7 +269,7 @@
   }
 
   // Free temporary memory used to construct idom's
-  DT.IDoms.clear();
+  //DT.IDoms.clear();
   DT.Info.clear();
   DT.Vertex.clear();
   DT.Vertex.shrink_to_fit();
Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
===================================================================
--- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -137,7 +137,7 @@
              "(default = 75)"));
 
 static cl::opt<bool> EnableGVNHoist(
-    "enable-gvn-hoist", cl::init(false), cl::Hidden,
+    "enable-gvn-hoist", cl::init(true), cl::Hidden,
     cl::desc("Enable the GVN hoisting pass"));
 
 static cl::opt<bool>
@@ -477,6 +477,10 @@
     MPM.add(Inliner);
     Inliner = nullptr;
   }
+  // GVNHoist
+  if(EnableGVNHoist)
+    MPM.add(createGVNHoistPass());
+
   if (!DisableUnitAtATime)
     MPM.add(createPostOrderFunctionAttrsLegacyPass());
   if (OptLevel > 2)
Index: llvm/lib/Transforms/Scalar/GVNHoist.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/GVNHoist.cpp
+++ llvm/lib/Transforms/Scalar/GVNHoist.cpp
@@ -17,14 +17,57 @@
 // is disabled in the following cases.
 // 1. Scalars across calls.
 // 2. geps when corresponding load/store cannot be hoisted.
+//
+// TODO:
+// For -Oz scalars are always safe to hoist.
+// For -O2/-O3 hoist only when the live range improves or remains the same.
+// If we haven't computed dominator tree levels, do so now.
+// For sink operation anything after hoist barrier is okay to sink
+// but nothing before the hoist barrier. See Danny's patch on HoistBarriers.
+//
+// Find sink opportunities: sink instructions to a common post dominator when
+// they compute the same value and hoisting isn't profitable because that will
+// increase the liveness. In this case sinking may decrease the liveness when
+// the instruction can be delayed. But do not delay so much that latency of the
+// operation takes over. Like divide takes 10 cycles maybe, and if the divide is
+// sunk into a BB where it is used then the result of divide may take long time
+// anyways. So an estimated latency of operation should be computed and the
+// instruction should be sunk only till the point when result may be readily
+// available for the user of the instruction.
+//
+// Sinking store may not always be beneficial: sinking frees up a register so
+// improves register allocation the stored result may be loaded again, in that
+// case it is good to store soon (??)
+//
+// TODO: Hoist from >2 successors. Currently GVNHoist will not hoist stores
+// in this case because it works on two instructions at a time.
+// entry:
+//   switch i32 %c1, label %exit1 [
+//     i32 0, label %sw0
+//     i32 1, label %sw1
+//   ]
+//
+// sw0:
+//   store i32 1, i32* @G
+//   br label %exit
+//
+// sw1:
+//   store i32 1, i32* @G
+//   br label %exit
+//
+// exit1:
+//   store i32 1, i32* @G
+//   ret void
+// exit:
+//   ret void
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar/GVN.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/MemorySSA.h"
 #include "llvm/Transforms/Utils/MemorySSAUpdater.h"
@@ -34,6 +77,7 @@
 #define DEBUG_TYPE "gvn-hoist"
 
 STATISTIC(NumHoisted, "Number of instructions hoisted");
+STATISTIC(NumSunk, "Number of instructions sunk");
 STATISTIC(NumRemoved, "Number of instructions removed");
 STATISTIC(NumLoadsHoisted, "Number of loads hoisted");
 STATISTIC(NumLoadsRemoved, "Number of loads removed");
@@ -41,6 +85,7 @@
 STATISTIC(NumStoresRemoved, "Number of stores removed");
 STATISTIC(NumCallsHoisted, "Number of calls hoisted");
 STATISTIC(NumCallsRemoved, "Number of calls removed");
+//STATISTIC(NumBarriers, "Number of barriers");
 
 static cl::opt<int>
     MaxHoistedThreshold("gvn-max-hoisted", cl::Hidden, cl::init(-1),
@@ -60,6 +105,9 @@
     MaxChainLength("gvn-hoist-max-chain-length", cl::Hidden, cl::init(10),
                    cl::desc("Maximum length of dependent chains to hoist "
                             "(default = 10, unlimited = -1)"));
+static cl::opt<bool>
+    CheckProfitability("gvn-hoist-check-profitability", cl::Hidden, cl::init(true),
+                   cl::desc("Check for proitability (reducing register pressure)"));
 
 namespace llvm {
 
@@ -73,13 +121,6 @@
 
   // Returns true when A executes before B.
   bool operator()(const Instruction *A, const Instruction *B) const {
-    // FIXME: libc++ has a std::sort() algorithm that will call the compare
-    // function on the same element.  Once PR20837 is fixed and some more years
-    // pass by and all the buildbots have moved to a corrected std::sort(),
-    // enable the following assert:
-    //
-    // assert(A != B);
-
     const BasicBlock *BA = A->getParent();
     const BasicBlock *BB = B->getParent();
     unsigned ADFS, BDFS;
@@ -114,6 +155,8 @@
     VNtoScalars[{V, InvalidVN}].push_back(I);
   }
 
+  void clear() { VNtoScalars.clear(); }
+
   const VNtoInsns &getVNTable() const { return VNtoScalars; }
 };
 
@@ -130,6 +173,8 @@
     }
   }
 
+  void clear() { VNtoLoads.clear(); }
+
   const VNtoInsns &getVNTable() const { return VNtoLoads; }
 };
 
@@ -149,6 +194,8 @@
     VNtoStores[{VN.lookupOrAdd(Ptr), VN.lookupOrAdd(Val)}].push_back(Store);
   }
 
+  void clear() { VNtoStores.clear(); }
+
   const VNtoInsns &getVNTable() const { return VNtoStores; }
 };
 
@@ -175,6 +222,12 @@
       VNtoCallsStores[Entry].push_back(Call);
   }
 
+  void clear() {
+    VNtoCallsScalars.clear();
+    VNtoCallsLoads.clear();
+    VNtoCallsStores.clear();
+  }
+
   const VNtoInsns &getScalarVNTable() const { return VNtoCallsScalars; }
 
   const VNtoInsns &getLoadVNTable() const { return VNtoCallsLoads; }
@@ -185,6 +238,13 @@
 typedef DenseMap<const BasicBlock *, bool> BBSideEffectsSet;
 typedef SmallVector<Instruction *, 4> SmallVecInsn;
 typedef SmallVectorImpl<Instruction *> SmallVecImplInsn;
+typedef SmallSet<BasicBlock *, 2> SmallSetBB;
+typedef DenseMap<BasicBlock *, SmallSetBB> MergeSetT;
+typedef SmallVector<BasicBlock *, 4> SmallVecBB;
+typedef SmallVecBB BBLevelKeyT;
+typedef std::map<unsigned, BBLevelKeyT> BBLevelT;
+typedef DenseMap<BasicBlock *, unsigned> DomLevelsT;
+typedef std::pair<BasicBlock *, BasicBlock *> EdgeT;
 
 static void combineKnownMetadata(Instruction *ReplInst, Instruction *I) {
   static const unsigned KnownIDs[] = {
@@ -195,55 +255,123 @@
   combineMetadata(ReplInst, I, KnownIDs);
 }
 
+void printBBLevels(const BBLevelT &BBLevels) {
+  for (const std::pair<unsigned, BBLevelKeyT> &P : BBLevels) {
+    dbgs() << "\nLevel: " << P.first << "\n";
+    for (const BasicBlock *BB : P.second)
+      dbgs() << *BB << "\n";
+  }
+}
+
+void printMergeSet(const MergeSetT &M) {
+  // For printing in a deterministic order.
+  typedef std::set<const BasicBlock *> SetConstBB;
+  std::map<BasicBlock *, SetConstBB> PrintM;
+  for (const std::pair<BasicBlock *, SmallSetBB> &P : M) {
+    for (const BasicBlock *BB : P.second)
+      PrintM[P.first].insert(BB);
+  }
+  for (const std::pair<BasicBlock *, SetConstBB> &P : PrintM) {
+    dbgs() << "\nMergeSet of: " << P.first->getName() << ": ";
+    for (const BasicBlock *BB : P.second)
+      dbgs() << BB->getName() << ", ";
+  }
+}
+
+void printJEdges(const DenseSet<EdgeT> &Edges) {
+  // For printing in a deterministic order.
+  std::set<EdgeT> PrintE(Edges.begin(), Edges.end());
+
+  for (const EdgeT &E : PrintE)
+    dbgs() << "\nFound a JEdge: " << E.first->getName() << " -> "
+           << E.second->getName();
+}
+
+void printSmallSet(SmallSetBB &S) {
+  dbgs() << "\nPrinting SmallSet: ";
+  for (const auto &BB : S)
+    dbgs() << BB->getName() << ",";
+}
+
+static inline void updateLocalStats(const Instruction* Repl, unsigned &NI, unsigned &NL, unsigned &NS, unsigned &NC) {
+  if (isa<LoadInst>(Repl))
+    ++NL;
+  else if (isa<StoreInst>(Repl))
+    ++NS;
+  else if (isa<CallInst>(Repl))
+    ++NC;
+  else // Scalar
+    ++NI;
+}
+
+static inline void updateGlobalStats(unsigned NI, unsigned NL, unsigned NS, unsigned NC, unsigned NR) {
+  NumHoisted += NL + NS + NC + NI;
+  NumRemoved += NR;
+  NumLoadsHoisted += NL;
+  NumStoresHoisted += NS;
+  NumCallsHoisted += NC;
+}
+
 // This pass hoists common computations across branches sharing common
 // dominator. The primary goal is to reduce the code size, and in some
 // cases reduce critical path (by exposing more ILP).
 class GVNHoist {
 public:
+  // Each element of a hoisting list contains the basic block where to hoist and
+  // a list of instructions to be hoisted.
+  typedef std::pair<BasicBlock *, SmallVecInsn> HoistingPointInfo;
+  typedef SmallVector<HoistingPointInfo, 4> HoistingPointList;
+
   GVNHoist(DominatorTree *DT, AliasAnalysis *AA, MemoryDependenceResults *MD,
            MemorySSA *MSSA)
       : DT(DT), AA(AA), MD(MD), MSSA(MSSA),
         MSSAUpdater(make_unique<MemorySSAUpdater>(MSSA)),
-        HoistingGeps(false),
-        HoistedCtr(0)
-  { }
-
-  bool run(Function &F) {
-    VN.setDomTree(DT);
-    VN.setAliasAnalysis(AA);
-    VN.setMemDep(MD);
-    bool Res = false;
-    // Perform DFS Numbering of instructions.
-    unsigned BBI = 0;
-    for (const BasicBlock *BB : depth_first(&F.getEntryBlock())) {
-      DFSNumber[BB] = ++BBI;
-      unsigned I = 0;
-      for (auto &Inst : *BB)
-        DFSNumber[&Inst] = ++I;
-    }
+        HoistedCtr(0), HoistingGeps(false) {
+    clearVNTables();
+  }
 
-    int ChainLength = 0;
+  void clearVNTables() {
+    II.clear();
+    LI.clear();
+    SI.clear();
+    CI.clear();
+  }
 
-    // FIXME: use lazy evaluation of VN to avoid the fix-point computation.
-    while (1) {
-      if (MaxChainLength != -1 && ++ChainLength >= MaxChainLength)
-        return Res;
+  // DomLevels maps from BB -> its depth from root.
+  // JEdges only contain the J edges as D edges are available in Dominator Tree.
+  // BBLevels maps each depth in the CFG to all the Basic Blocks at that level.
+  // DJ Graph is described in "Sreedhar, Vugranam C. Efficient program analysis
+  // using DJ graphs. McGill University, 1996".
+  void constructDJGraph(DomLevelsT &DomLevels, DenseSet<EdgeT> &JEdges,
+                        BBLevelT &BBLevels);
+
+  // Return true if S1 is a subset of S2.
+  bool isSubset(const SmallSetBB &S1, const SmallSetBB &S2) {
+    if (S1.size() > S2.size())
+      return false;
+    for (BasicBlock *BB : S1) {
+      if (!S2.count(BB))
+        return false;
+    }
+    return true;
+  }
 
-      auto HoistStat = hoistExpressions(F);
-      if (HoistStat.first + HoistStat.second == 0)
-        return Res;
+  // DomLevels maps BB to its depth from root.
+  // JEdges only contain the J-edges as D-edges are available in Dominator Tree.
+  // BBLevels maps each depth in the CFG to all the BBs at that level.
+  // BILARDI, G. AND PINGALI, K. 2003. Algorithms for computing the static
+  // single assignment form. J. ACM 50, 3 (May), 375–425.
+  bool constructMergeSet(DomLevelsT &DomLevels, DenseSet<EdgeT> &JEdges,
+                         BBLevelT &BBLevels);
 
-      if (HoistStat.second > 0)
-        // To address a limitation of the current GVN, we need to rerun the
-        // hoisting after we hoisted loads or stores in order to be able to
-        // hoist all scalars dependent on the hoisted ld/st.
-        VN.clear();
+  // Returns true if the \p Op is the last use at I.
+  // TODO: Find O(1) algorithm for this.
+  const Instruction *lastUser(const Instruction *I, const Value *Val) const;
 
-      Res = true;
-    }
+  // Returns true if the \p Op is live-out from \p BB.
+  bool isLiveOutUsingMergeSet(BasicBlock *BB, Value *Val) const;
 
-    return Res;
-  }
+  bool run(Function &F);
 
 private:
   GVN::ValueTable VN;
@@ -252,31 +380,28 @@
   MemoryDependenceResults *MD;
   MemorySSA *MSSA;
   std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
-  const bool HoistingGeps;
   DenseMap<const Value *, unsigned> DFSNumber;
   BBSideEffectsSet BBSideEffects;
+  MergeSetT MergeSet;
+  DenseSet<const BasicBlock *> HoistBarrier;
+  InsnInfo II;
+  LoadInfo LI;
+  StoreInfo SI;
+  CallInfo CI;
   int HoistedCtr;
+  const bool HoistingGeps;
+  //unsigned NumFuncArgs;
+  //DenseMap<const Value *, unsigned> Rank;
 
   enum InsKind { Unknown, Scalar, Load, Store };
 
-  // Return true when there are exception handling in BB.
-  bool hasEH(const BasicBlock *BB) {
-    auto It = BBSideEffects.find(BB);
-    if (It != BBSideEffects.end())
-      return It->second;
-
-    if (BB->isEHPad() || BB->hasAddressTaken()) {
-      BBSideEffects[BB] = true;
-      return true;
-    }
-
-    if (BB->getTerminator()->mayThrow()) {
-      BBSideEffects[BB] = true;
-      return true;
-    }
-
-    BBSideEffects[BB] = false;
-    return false;
+  // Return true when I1 appears before I2 in the instructions of BB.
+  bool firstInBB(const Instruction *I1, const Instruction *I2) const {
+    assert(I1->getParent() == I2->getParent());
+    unsigned I1DFS = DFSNumber.lookup(I1);
+    unsigned I2DFS = DFSNumber.lookup(I2);
+    assert(I1DFS && I2DFS);
+    return I1DFS < I2DFS;
   }
 
   // Return true when a successor of BB dominates A.
@@ -288,693 +413,1331 @@
     return false;
   }
 
+  bool hoistCandidate(const User *U) const {
+    if (!VN.exists(const_cast<User *>(U))) // Only for scalars
+      return false;
+    unsigned V = VN.lookup(const_cast<User *>(U));
+
+    // Multiple scalars with same VN have very high chance of being hoisted.
+    if (II.getVNTable().count({V, InvalidVN}) > 1)
+      return true;
+
+    return false;
+  }
+
+  // Return true when there are exception handling in BB.
+  bool hasEH(const BasicBlock *BB);
+
+  // Return true when there are exception handling or loads of memory Def
+  // between Def and NewPt.  This function is only called for stores: Def is
+  // the MemoryDef of the store to be hoisted.
+  //
+  // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and
+  // return true when the counter NBBsOnAllPaths reaces 0, except when it is
+  // initialized to -1 which is unlimited.
+  bool hasEHOrLoadsOnPath(const Instruction *NewPt, MemoryDef *Def,
+                          int &NBBsOnAllPaths);
+
+  // Return true when there are exception handling between HoistPt and BB.
+  // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and
+  // return true when the counter NBBsOnAllPaths reaches 0, except when it is
+  // initialized to -1 which is unlimited.
+  bool hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *SrcBB,
+                   int &NBBsOnAllPaths);
+
   // Return true when all paths from HoistBB to the end of the function pass
   // through one of the blocks in WL.
   bool hoistingFromAllPaths(const BasicBlock *HoistBB,
-                            SmallPtrSetImpl<const BasicBlock *> &WL) {
+                            SmallPtrSetImpl<const BasicBlock *> &WL);
 
-    // Copy WL as the loop will remove elements from it.
-    SmallPtrSet<const BasicBlock *, 2> WorkList(WL.begin(), WL.end());
+  // Return true when there are memory uses of Def in BB.
+  bool hasMemoryUse(const Instruction *NewPt, MemoryDef *Def,
+                    const BasicBlock *BB);
 
-    for (auto It = df_begin(HoistBB), E = df_end(HoistBB); It != E;) {
-      // There exists a path from HoistBB to the exit of the function if we are
-      // still iterating in DF traversal and we removed all instructions from
-      // the work list.
-      if (WorkList.empty())
-        return false;
+  // Return true when it is safe to hoist scalar instructions from all blocks in
+  // WL to HoistBB. TODO: Hoisting scalars past a call only when the
+  // interference does not increase.
+  bool safeToHoistScalar(const BasicBlock *HoistBB,
+                         SmallPtrSetImpl<const BasicBlock *> &WL,
+                         int &NBBsOnAllPaths);
 
-      const BasicBlock *BB = *It;
-      if (WorkList.erase(BB)) {
-        // Stop DFS traversal when BB is in the work list.
-        It.skipChildren();
-        continue;
-      }
+  // Return true when it is safe to hoist a memory load or store U from OldPt
+  // to NewPt.
+  bool safeToHoistLdSt(const Instruction *NewPt, const Instruction *OldPt,
+                       MemoryUseOrDef *MA, InsKind K, int &NBBsOnAllPaths);
 
-      // Check for end of function, calls that do not return, etc.
-      if (!isGuaranteedToTransferExecutionToSuccessor(BB->getTerminator()))
-        return false;
+  bool safeToHoistLdSt_0(const Instruction *NewHoistPt,
+                         const Instruction *HoistPt, const Instruction *Insn,
+                         MemoryUseOrDef *MA, GVNHoist::InsKind K,
+                         int &NumBBsOnAllPaths, const BasicBlock *HoistPtBB,
+                         const BasicBlock *NewHoistBB, const BasicBlock *InsnBB,
+                         SmallPtrSetImpl<const BasicBlock *> &WL);
 
-      // When reaching the back-edge of a loop, there may be a path through the
-      // loop that does not pass through B or C before exiting the loop.
-      if (successorDominate(BB, HoistBB))
-        return false;
+  // Return true when it is profitable to hoist I1.
+  bool profitableToHoist(Instruction *I) const;
 
-      // Increment DFS traversal when not skipping children.
-      ++It;
-    }
+  // Return true when it is profitable to hoist I1.
+  bool profitableToSink(Instruction *I) const;
+
+  // Partition InstructionsToHoist into a set of candidates which can share a
+  // common hoisting point. The partitions are collected in HPL. IsScalar is
+  // true when the instructions in InstructionsToHoist are scalars. IsLoad is
+  // true when the InstructionsToHoist are loads, false when they are stores.
+  void partitionCandidates(SmallVecImplInsn &InstructionsToHoist,
+                           HoistingPointList &HPL, InsKind K);
+
+  bool safeToSinkToEnd(Instruction *I, BasicBlock *BB, GVNHoist::InsKind K);
+
+  // Initialize HPL from Map.
+  void findHoistableInsn(const VNtoInsns &Map, HoistingPointList &HPL,
+                              InsKind K);
+
+  /*void getRanks(Function &F);
+
+  void sortByRank(SmallVectorImpl<unsigned> &V, const VNtoInsns &Map) const;*/
+
+  // Find sinkable instructions.
+  void findSinkableInsn(const VNtoInsns &Map, HoistingPointList &HPL,
+                        InsKind K);
+
+  // Return true when all operands of Instr are available at insertion point
+  // HoistPt. When limiting the number of hoisted expressions, one could hoist
+  // a load without hoisting its access function. So before hoisting any
+  // expression, make sure that all its operands are available at insert point.
+  bool allOperandsAvailable(const Instruction *I,
+                            const BasicBlock *HoistPt) const {
+    for (const Use &Op : I->operands())
+      if (const auto *Inst = dyn_cast<Instruction>(&Op))
+        if (!DT->dominates(Inst->getParent(), HoistPt))
+          return false;
 
     return true;
   }
 
-  /* Return true when I1 appears before I2 in the instructions of BB.  */
-  bool firstInBB(const Instruction *I1, const Instruction *I2) {
-    assert(I1->getParent() == I2->getParent());
-    unsigned I1DFS = DFSNumber.lookup(I1);
-    unsigned I2DFS = DFSNumber.lookup(I2);
-    assert(I1DFS && I2DFS);
-    return I1DFS < I2DFS;
+  // Same as allOperandsAvailable with recursive check for GEP operands.
+  bool allGepOperandsAvailable(const Instruction *I,
+                               const BasicBlock *HoistPt) const;
+
+  // Make all operands of the GEP available.
+  void makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt,
+                         const SmallVecInsn &InstructionsToHoist,
+                         Instruction *Gep) const;
+
+  // In the case Repl is a load or a store, we make all their GEPs
+  // available: GEPs are not hoisted by default to avoid the address
+  // computations to be hoisted without the associated load or store.
+  bool makeGepOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt,
+                                const SmallVecInsn &InstructionsToHoist) const;
+
+  void setAlignment(Instruction *I, Instruction *Repl);
+  // Remove and rename all instructions other than Repl.
+  void removeAndReplace(const SmallVecInsn &InstructionsToHoist,
+                            Instruction *Repl);
+  void removeAndReplace(Instruction *I, Instruction *Repl, MemoryAccess *NewMemAcc);
+  void removeMPhi(MemoryAccess *NewMemAcc);
+  std::pair<unsigned, unsigned> hoist(HoistingPointList &HPL);
+  std::pair<unsigned, unsigned> sink(HoistingPointList &HPL);
+  bool getVN(Instruction *I);
+  void barriersAndVNs(Function &F, bool Sinking);
+  // Hoist all expressions. Returns Number of scalars hoisted
+  // and number of non-scalars hoisted.
+  std::pair<unsigned, unsigned> hoistExpressions(Function &F);
+  std::pair<unsigned, unsigned> sinkExpressions(Function &F);
+};
+
+class GVNHoistLegacyPass : public FunctionPass {
+public:
+  static char ID;
+
+  GVNHoistLegacyPass() : FunctionPass(ID) {
+    initializeGVNHoistLegacyPassPass(*PassRegistry::getPassRegistry());
   }
 
-  // Return true when there are memory uses of Def in BB.
-  bool hasMemoryUse(const Instruction *NewPt, MemoryDef *Def,
-                    const BasicBlock *BB) {
-    const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB);
-    if (!Acc)
+  bool runOnFunction(Function &F) override {
+    if (skipFunction(F))
       return false;
+    auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+    auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+    auto &MD = getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
+    auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
 
-    Instruction *OldPt = Def->getMemoryInst();
-    const BasicBlock *OldBB = OldPt->getParent();
-    const BasicBlock *NewBB = NewPt->getParent();
-    bool ReachedNewPt = false;
+    GVNHoist G(&DT, &AA, &MD, &MSSA);
+    return G.run(F);
+  }
 
-    for (const MemoryAccess &MA : *Acc)
-      if (const MemoryUse *MU = dyn_cast<MemoryUse>(&MA)) {
-        Instruction *Insn = MU->getMemoryInst();
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<AAResultsWrapperPass>();
+    AU.addRequired<MemoryDependenceWrapperPass>();
+    AU.addRequired<MemorySSAWrapperPass>();
+    AU.addPreserved<DominatorTreeWrapperPass>();
+    AU.addPreserved<MemorySSAWrapperPass>();
+  }
+};
 
-        // Do not check whether MU aliases Def when MU occurs after OldPt.
-        if (BB == OldBB && firstInBB(OldPt, Insn))
-          break;
+bool GVNHoist::hasEH(const BasicBlock *BB) {
+  auto It = BBSideEffects.find(BB);
+  if (It != BBSideEffects.end())
+    return It->second;
 
-        // Do not check whether MU aliases Def when MU occurs before NewPt.
-        if (BB == NewBB) {
-          if (!ReachedNewPt) {
-            if (firstInBB(Insn, NewPt))
-              continue;
-            ReachedNewPt = true;
-          }
-        }
-        if (MemorySSAUtil::defClobbersUseOrDef(Def, MU, *AA))
-          return true;
-      }
+  if (BB->isEHPad() || BB->hasAddressTaken()) {
+    BBSideEffects[BB] = true;
+    return true;
+  }
 
-    return false;
+  if (BB->getTerminator()->mayThrow()) {
+    BBSideEffects[BB] = true;
+    return true;
   }
 
-  // Return true when there are exception handling or loads of memory Def
-  // between Def and NewPt.  This function is only called for stores: Def is
-  // the MemoryDef of the store to be hoisted.
+  BBSideEffects[BB] = false;
+  return false;
+}
 
-  // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and
-  // return true when the counter NBBsOnAllPaths reaces 0, except when it is
-  // initialized to -1 which is unlimited.
-  bool hasEHOrLoadsOnPath(const Instruction *NewPt, MemoryDef *Def,
-                          int &NBBsOnAllPaths) {
-    const BasicBlock *NewBB = NewPt->getParent();
-    const BasicBlock *OldBB = Def->getBlock();
-    assert(DT->dominates(NewBB, OldBB) && "invalid path");
-    assert(DT->dominates(Def->getDefiningAccess()->getBlock(), NewBB) &&
-           "def does not dominate new hoisting point");
-
-    // Walk all basic blocks reachable in depth-first iteration on the inverse
-    // CFG from OldBB to NewBB. These blocks are all the blocks that may be
-    // executed between the execution of NewBB and OldBB. Hoisting an expression
-    // from OldBB into NewBB has to be safe on all execution paths.
-    for (auto I = idf_begin(OldBB), E = idf_end(OldBB); I != E;) {
-      if (*I == NewBB) {
-        // Stop traversal when reaching HoistPt.
-        I.skipChildren();
-        continue;
-      }
+bool GVNHoist::hasEHOrLoadsOnPath(const Instruction *NewPt, MemoryDef *Def,
+                                  int &NBBsOnAllPaths) {
+  const BasicBlock *NewBB = NewPt->getParent();
+  const BasicBlock *OldBB = Def->getBlock();
+  assert(DT->dominates(NewBB, OldBB) && "invalid path");
+  assert(DT->dominates(Def->getDefiningAccess()->getBlock(), NewBB) &&
+         "def does not dominate new hoisting point");
+
+  // Walk all basic blocks reachable in depth-first iteration on the inverse
+  // CFG from OldBB to NewBB. These blocks are all the blocks that may be
+  // executed between the execution of NewBB and OldBB. Hoisting an expression
+  // from OldBB into NewBB has to be safe on all execution paths.
+  for (auto I = idf_begin(OldBB), E = idf_end(OldBB); I != E;) {
+    const BasicBlock *BB = *I;
+    if (BB == NewBB) {
+      // Stop traversal when reaching HoistPt.
+      I.skipChildren();
+      continue;
+    }
 
-      // Stop walk once the limit is reached.
-      if (NBBsOnAllPaths == 0)
-        return true;
+    // Stop walk once the limit is reached.
+    if (NBBsOnAllPaths == 0)
+      return true;
 
-      // Impossible to hoist with exceptions on the path.
-      if (hasEH(*I))
-        return true;
+    // Impossible to hoist with exceptions on the path.
+    if (hasEH(BB))
+      return true;
 
-      // Check that we do not move a store past loads.
-      if (hasMemoryUse(NewPt, Def, *I))
-        return true;
+    // No such instruction after HoistBarrier in a basic block was
+    // selected for hoisting so instructions selected within basic block with
+    // a hoist barrier can be hoisted.
+    if ((BB != OldBB) && HoistBarrier.count(BB))
+      return true;
 
-      // -1 is unlimited number of blocks on all paths.
-      if (NBBsOnAllPaths != -1)
-        --NBBsOnAllPaths;
+    // Check that we do not move a store past loads.
+    if (hasMemoryUse(NewPt, Def, BB))
+      return true;
 
-      ++I;
-    }
+    // -1 is unlimited number of blocks on all paths.
+    if (NBBsOnAllPaths != -1)
+      --NBBsOnAllPaths;
 
-    return false;
+    ++I;
   }
 
-  // Return true when there are exception handling between HoistPt and BB.
-  // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and
-  // return true when the counter NBBsOnAllPaths reaches 0, except when it is
-  // initialized to -1 which is unlimited.
-  bool hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *BB,
-                   int &NBBsOnAllPaths) {
-    assert(DT->dominates(HoistPt, BB) && "Invalid path");
-
-    // Walk all basic blocks reachable in depth-first iteration on
-    // the inverse CFG from BBInsn to NewHoistPt. These blocks are all the
-    // blocks that may be executed between the execution of NewHoistPt and
-    // BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe
-    // on all execution paths.
-    for (auto I = idf_begin(BB), E = idf_end(BB); I != E;) {
-      if (*I == HoistPt) {
-        // Stop traversal when reaching NewHoistPt.
-        I.skipChildren();
-        continue;
-      }
+  return false;
+}
 
-      // Stop walk once the limit is reached.
-      if (NBBsOnAllPaths == 0)
-        return true;
+bool GVNHoist::hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *SrcBB,
+                           int &NBBsOnAllPaths) {
+  assert(DT->dominates(HoistPt, SrcBB) && "Invalid path");
+
+  // Walk all basic blocks reachable in depth-first iteration on
+  // the inverse CFG from BBInsn to NewHoistPt. These blocks are all the
+  // blocks that may be executed between the execution of NewHoistPt and
+  // BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe
+  // on all execution paths.
+  for (auto I = idf_begin(SrcBB), E = idf_end(SrcBB); I != E;) {
+    const BasicBlock *BB = *I;
+    if (BB == HoistPt) {
+      // Stop traversal when reaching NewHoistPt.
+      I.skipChildren();
+      continue;
+    }
 
-      // Impossible to hoist with exceptions on the path.
-      if (hasEH(*I))
-        return true;
+    // Stop walk once the limit is reached.
+    if (NBBsOnAllPaths == 0)
+      return true;
 
-      // -1 is unlimited number of blocks on all paths.
-      if (NBBsOnAllPaths != -1)
-        --NBBsOnAllPaths;
+    // Impossible to hoist with exceptions on the path.
+    if (hasEH(BB))
+      return true;
 
-      ++I;
-    }
+    // No such instruction after HoistBarrier in a basic block was
+    // selected for hoisting so instructions selected within basic block with
+    // a hoist barrier can be hoisted.
+    if ((BB != SrcBB) && HoistBarrier.count(BB))
+      return true;
 
-    return false;
+    // -1 is unlimited number of blocks on all paths.
+    if (NBBsOnAllPaths != -1)
+      --NBBsOnAllPaths;
+
+    ++I;
   }
 
-  // Return true when it is safe to hoist a memory load or store U from OldPt
-  // to NewPt.
-  bool safeToHoistLdSt(const Instruction *NewPt, const Instruction *OldPt,
-                       MemoryUseOrDef *U, InsKind K, int &NBBsOnAllPaths) {
+  return false;
+}
 
-    // In place hoisting is safe.
-    if (NewPt == OldPt)
-      return true;
+bool GVNHoist::hoistingFromAllPaths(const BasicBlock *HoistBB,
+                                    SmallPtrSetImpl<const BasicBlock *> &WL) {
 
-    const BasicBlock *NewBB = NewPt->getParent();
-    const BasicBlock *OldBB = OldPt->getParent();
-    const BasicBlock *UBB = U->getBlock();
+  // Copy WL as the loop will remove elements from it.
+  SmallPtrSet<const BasicBlock *, 2> WorkList(WL.begin(), WL.end());
 
-    // Check for dependences on the Memory SSA.
-    MemoryAccess *D = U->getDefiningAccess();
-    BasicBlock *DBB = D->getBlock();
-    if (DT->properlyDominates(NewBB, DBB))
-      // Cannot move the load or store to NewBB above its definition in DBB.
+  for (auto It = df_begin(HoistBB), E = df_end(HoistBB); It != E;) {
+    // There exists a path from HoistBB to the exit of the function if we are
+    // still iterating in DF traversal and we removed all instructions from
+    // the work list.
+    if (WorkList.empty())
       return false;
 
-    if (NewBB == DBB && !MSSA->isLiveOnEntryDef(D))
-      if (auto *UD = dyn_cast<MemoryUseOrDef>(D))
-        if (firstInBB(NewPt, UD->getMemoryInst()))
-          // Cannot move the load or store to NewPt above its definition in D.
-          return false;
+    const BasicBlock *BB = *It;
+    if (WorkList.erase(BB)) {
+      // Stop DFS traversal when BB is in the work list.
+      It.skipChildren();
+      continue;
+    }
 
-    // Check for unsafe hoistings due to side effects.
-    if (K == InsKind::Store) {
-      if (hasEHOrLoadsOnPath(NewPt, dyn_cast<MemoryDef>(U), NBBsOnAllPaths))
-        return false;
-    } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths))
+    // We reached the leaf Basic Block => not all paths have this instruction.
+    if (!BB->getTerminator()->getNumSuccessors())
+      return false;
+
+    // When reaching the back-edge of a loop, there may be a path through the
+    // loop that does not pass through B or C before exiting the loop.
+    if (successorDominate(BB, HoistBB))
       return false;
 
-    if (UBB == NewBB) {
-      if (DT->properlyDominates(DBB, NewBB))
+    // Increment DFS traversal when not skipping children.
+    ++It;
+  }
+
+  return true;
+}
+
+bool GVNHoist::hasMemoryUse(const Instruction *NewPt, MemoryDef *Def,
+                            const BasicBlock *BB) {
+  const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB);
+  if (!Acc)
+    return false;
+
+  Instruction *OldPt = Def->getMemoryInst();
+  const BasicBlock *OldBB = OldPt->getParent();
+  const BasicBlock *NewBB = NewPt->getParent();
+  bool ReachedNewPt = false;
+
+  for (const MemoryAccess &MA : *Acc)
+    if (const MemoryUse *MU = dyn_cast<MemoryUse>(&MA)) {
+      Instruction *Insn = MU->getMemoryInst();
+
+      // Do not check whether MU aliases Def when MU occurs after OldPt.
+      if (BB == OldBB && firstInBB(OldPt, Insn))
+        break;
+
+      // Do not check whether MU aliases Def when MU occurs before NewPt.
+      if (BB == NewBB) {
+        if (!ReachedNewPt) {
+          if (firstInBB(Insn, NewPt))
+            continue;
+          ReachedNewPt = true;
+        }
+      }
+      if (MemorySSAUtil::defClobbersUseOrDef(Def, MU, *AA))
         return true;
-      assert(UBB == DBB);
-      assert(MSSA->locallyDominates(D, U));
     }
 
-    // No side effects: it is safe to hoist.
-    return true;
-  }
+  return false;
+}
 
-  // Return true when it is safe to hoist scalar instructions from all blocks in
-  // WL to HoistBB.
-  bool safeToHoistScalar(const BasicBlock *HoistBB,
-                         SmallPtrSetImpl<const BasicBlock *> &WL,
-                         int &NBBsOnAllPaths) {
-    // Check that the hoisted expression is needed on all paths.
-    if (!hoistingFromAllPaths(HoistBB, WL))
+bool GVNHoist::safeToHoistScalar(const BasicBlock *HoistBB,
+                                 SmallPtrSetImpl<const BasicBlock *> &WL,
+                                 int &NBBsOnAllPaths) {
+  // Check that the hoisted expression is needed on all paths.
+  if (!hoistingFromAllPaths(HoistBB, WL))
+    return false;
+
+  for (const BasicBlock *BB : WL)
+    if (hasEHOnPath(HoistBB, BB, NBBsOnAllPaths))
       return false;
 
-    for (const BasicBlock *BB : WL)
-      if (hasEHOnPath(HoistBB, BB, NBBsOnAllPaths))
-        return false;
+  return true;
+}
 
+bool GVNHoist::safeToHoistLdSt(const Instruction *NewPt,
+                               const Instruction *OldPt, MemoryUseOrDef *MA,
+                               GVNHoist::InsKind K, int &NBBsOnAllPaths) {
+
+  // In place hoisting is safe.
+  if (NewPt == OldPt)
     return true;
-  }
 
-  // Each element of a hoisting list contains the basic block where to hoist and
-  // a list of instructions to be hoisted.
-  typedef std::pair<BasicBlock *, SmallVecInsn> HoistingPointInfo;
-  typedef SmallVector<HoistingPointInfo, 4> HoistingPointList;
+  const BasicBlock *NewBB = NewPt->getParent();
+  const BasicBlock *OldBB = OldPt->getParent();
+  const BasicBlock *MABB = MA->getBlock();
 
-  // Partition InstructionsToHoist into a set of candidates which can share a
-  // common hoisting point. The partitions are collected in HPL. IsScalar is
-  // true when the instructions in InstructionsToHoist are scalars. IsLoad is
-  // true when the InstructionsToHoist are loads, false when they are stores.
-  void partitionCandidates(SmallVecImplInsn &InstructionsToHoist,
-                           HoistingPointList &HPL, InsKind K) {
-    // No need to sort for two instructions.
-    if (InstructionsToHoist.size() > 2) {
-      SortByDFSIn Pred(DFSNumber);
-      std::sort(InstructionsToHoist.begin(), InstructionsToHoist.end(), Pred);
-    }
+  // Check for dependences on the Memory SSA.
+  MemoryAccess *Def = MA->getDefiningAccess();
+  BasicBlock *DefBB = Def->getBlock();
+  if (DT->properlyDominates(NewBB, DefBB))
+    // Cannot move the load or store to NewBB above its definition in DBB.
+    return false;
 
-    int NumBBsOnAllPaths = MaxNumberOfBBSInPath;
+  if (NewBB == DefBB && !MSSA->isLiveOnEntryDef(Def))
+    if (auto *UD = dyn_cast<MemoryUseOrDef>(Def))
+      if (firstInBB(NewPt, UD->getMemoryInst()))
+        // Cannot move the load or store to NewPt above its definition in D.
+        return false;
 
-    SmallVecImplInsn::iterator II = InstructionsToHoist.begin();
-    SmallVecImplInsn::iterator Start = II;
-    Instruction *HoistPt = *II;
-    BasicBlock *HoistBB = HoistPt->getParent();
-    MemoryUseOrDef *UD;
-    if (K != InsKind::Scalar)
-      UD = MSSA->getMemoryAccess(HoistPt);
-
-    for (++II; II != InstructionsToHoist.end(); ++II) {
-      Instruction *Insn = *II;
-      BasicBlock *BB = Insn->getParent();
-      BasicBlock *NewHoistBB;
-      Instruction *NewHoistPt;
-
-      if (BB == HoistBB) { // Both are in the same Basic Block.
-        NewHoistBB = HoistBB;
-        NewHoistPt = firstInBB(Insn, HoistPt) ? Insn : HoistPt;
-      } else {
-        // If the hoisting point contains one of the instructions,
-        // then hoist there, otherwise hoist before the terminator.
-        NewHoistBB = DT->findNearestCommonDominator(HoistBB, BB);
-        if (NewHoistBB == BB)
-          NewHoistPt = Insn;
-        else if (NewHoistBB == HoistBB)
-          NewHoistPt = HoistPt;
-        else
-          NewHoistPt = NewHoistBB->getTerminator();
-      }
+  // Check for unsafe hoistings due to side effects.
+  if (K == InsKind::Store) {
+    if (hasEHOrLoadsOnPath(NewPt, dyn_cast<MemoryDef>(MA), NBBsOnAllPaths))
+      return false;
+  } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths))
+    return false;
 
-      SmallPtrSet<const BasicBlock *, 2> WL;
-      WL.insert(HoistBB);
-      WL.insert(BB);
+  if (MABB == NewBB) {
+    if (DT->properlyDominates(DefBB, NewBB))
+      return true;
+    assert(MABB == DefBB);
+    assert(MSSA->locallyDominates(Def, MA));
+  }
 
-      if (K == InsKind::Scalar) {
-        if (safeToHoistScalar(NewHoistBB, WL, NumBBsOnAllPaths)) {
-          // Extend HoistPt to NewHoistPt.
-          HoistPt = NewHoistPt;
-          HoistBB = NewHoistBB;
-          continue;
-        }
-      } else {
-        // When NewBB already contains an instruction to be hoisted, the
-        // expression is needed on all paths.
-        // Check that the hoisted expression is needed on all paths: it is
-        // unsafe to hoist loads to a place where there may be a path not
-        // loading from the same address: for instance there may be a branch on
-        // which the address of the load may not be initialized.
-        if ((HoistBB == NewHoistBB || BB == NewHoistBB ||
-             hoistingFromAllPaths(NewHoistBB, WL)) &&
-            // Also check that it is safe to move the load or store from HoistPt
-            // to NewHoistPt, and from Insn to NewHoistPt.
-            safeToHoistLdSt(NewHoistPt, HoistPt, UD, K, NumBBsOnAllPaths) &&
-            safeToHoistLdSt(NewHoistPt, Insn, MSSA->getMemoryAccess(Insn),
-                            K, NumBBsOnAllPaths)) {
-          // Extend HoistPt to NewHoistPt.
-          HoistPt = NewHoistPt;
-          HoistBB = NewHoistBB;
-          continue;
+  // No side effects: it is safe to hoist.
+  return true;
+}
+
+
+bool GVNHoist::safeToHoistLdSt_0(const Instruction *NewHoistPt,
+                                 const Instruction *HoistPt,
+                                 const Instruction *Insn, MemoryUseOrDef *MA,
+                                 GVNHoist::InsKind K, int &NumBBsOnAllPaths,
+                                 const BasicBlock *HoistPtBB,
+                                 const BasicBlock *NewHoistBB,
+                                 const BasicBlock *InsnBB,
+                                 SmallPtrSetImpl<const BasicBlock *> &WL) {
+  return (HoistPtBB == NewHoistBB || InsnBB == NewHoistBB ||
+          hoistingFromAllPaths(NewHoistBB, WL)) &&
+         // Also check that it is safe to move the load or store from HoistPt
+         // to NewHoistPt, and from Insn to NewHoistPt. Note that HoistPt may
+         // not be the instruction to be hoisted, it is a transient placeholder
+         // to find the farthest hoisting point when >2 hoistable candidates
+         // can be hoisted to a common dominator.
+         safeToHoistLdSt(NewHoistPt, HoistPt, MA, K, NumBBsOnAllPaths) &&
+         safeToHoistLdSt(NewHoistPt, Insn, MSSA->getMemoryAccess(Insn), K,
+                         NumBBsOnAllPaths);
+}
+
+bool GVNHoist::profitableToHoist(Instruction *I) const {
+  if (!CheckProfitability)
+    return true;
+  // For -O3/-O2 hoist only when the liveness decreases i.e., no more than
+  // one operand can be a use without kill.
+  // Store and Calls do not create a register def.
+  if (isa<StoreInst>(I) || isa<CallInst>(I))
+    return true;
+
+  // If Op is a kill then it will not be live-out from its basic block
+  // but the reverse is not true.
+  for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
+    Value *Op = I->getOperand(op);
+    //if (isa<Constant>(Op))
+    //  continue;
+    if (isLiveOutUsingMergeSet(I->getParent(), Op))
+      return false;
+    // It is always profitable to hoist when the liveness does not increase,
+    // a Kill will compensate for the def created by this instruction.
+    const Instruction *LU = lastUser(I, Op);
+    if (LU == I)
+      return true;
+    else {
+      // We optimistically assume that if all the users of Op are hoistable
+      // candidates then it is profitable to hoist.
+      bool stillProfitable = true;
+      for (const User *U : Op->users()) {
+        if (!hoistCandidate(U)) {
+          stillProfitable = false;
+          break;
         }
+        DEBUG(dbgs() << "\nstill hoistable:" << *U);
       }
+      if (stillProfitable)
+        return true;
+    }
+  }
+  return false;
+}
+
+bool GVNHoist::profitableToSink(Instruction *I) const {
+  // For -O3/-O2 sink only when the liveness decreases i.e., no more than
+  // one operand can be a kill.
+  // Store and Calls do not create a register def.
+  if (isa<StoreInst>(I) || isa<CallInst>(I))
+    return false;
+
+  // If Op is a kill then it will not be live-out from its basic block
+  // but the reverse is not true.
+  unsigned NumKills = 0;
+  for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
+    Value *Op = I->getOperand(op);
+    // It is always profitable to sink when the liveness does not increase.
+    if (isLiveOutUsingMergeSet(I->getParent(), Op))
+      continue;
+    // A Kill will increase the liveness during a sink.
+    const Instruction *LU = lastUser(I, Op);
+    if (LU == I && ++NumKills > 1)
+      return false;
+  }
+  return true;
+}
+
+void GVNHoist::partitionCandidates(SmallVecImplInsn &InstructionsToHoist,
+                                   GVNHoist::HoistingPointList &HPL,
+                                   GVNHoist::InsKind K) {
+  // No need to sort for two instructions.
+  if (InstructionsToHoist.size() > 2) {
+    SortByDFSIn Pred(DFSNumber);
+    std::sort(InstructionsToHoist.begin(), InstructionsToHoist.end(), Pred);
+  }
 
-      // At this point it is not safe to extend the current hoisting to
-      // NewHoistPt: save the hoisting list so far.
-      if (std::distance(Start, II) > 1)
-        HPL.push_back({HoistBB, SmallVecInsn(Start, II)});
-
-      // Start over from BB.
-      Start = II;
-      if (K != InsKind::Scalar)
-        UD = MSSA->getMemoryAccess(*Start);
-      HoistPt = Insn;
-      HoistBB = BB;
-      NumBBsOnAllPaths = MaxNumberOfBBSInPath;
+  int NumBBsOnAllPaths = MaxNumberOfBBSInPath;
+
+  SmallVecImplInsn::iterator II = InstructionsToHoist.begin();
+  SmallVecImplInsn::iterator Start = II;
+  Instruction *HoistPt = *II;
+  BasicBlock *HoistPtBB = HoistPt->getParent();
+  MemoryUseOrDef *MA;
+  if (K != InsKind::Scalar)
+    MA = MSSA->getMemoryAccess(HoistPt);
+
+  for (++II; II != InstructionsToHoist.end(); ++II) {
+    Instruction *Insn = *II;
+    BasicBlock *InsnBB = Insn->getParent();
+    BasicBlock *NewHoistBB;
+    Instruction *NewHoistPt;
+
+    if (InsnBB == HoistPtBB) { // Both are in the same Basic Block.
+      NewHoistBB = HoistPtBB;
+      NewHoistPt = firstInBB(Insn, HoistPt) ? Insn : HoistPt;
+    } else {
+      // If the hoisting point contains one of the instructions,
+      // then hoist there, otherwise hoist before the terminator.
+      NewHoistBB = DT->findNearestCommonDominator(HoistPtBB, InsnBB);
+      if (NewHoistBB == InsnBB)
+        NewHoistPt = Insn;
+      else if (NewHoistBB == HoistPtBB)
+        NewHoistPt = HoistPt;
+      else
+        NewHoistPt = NewHoistBB->getTerminator();
     }
 
-    // Save the last partition.
+    SmallPtrSet<const BasicBlock *, 2> WL;
+    WL.insert(HoistPtBB);
+    WL.insert(InsnBB);
+
+    // When NewBB already contains an instruction to be hoisted, the
+    // expression is needed on all paths.
+    // Check that the hoisted expression is needed on all paths: it is
+    // unsafe to hoist loads to a place where there may be a path not
+    // loading from the same address: for instance there may be a branch on
+    // which the address of the load may not be initialized.
+    if (K == InsKind::Scalar) {
+      if (safeToHoistScalar(NewHoistBB, WL, NumBBsOnAllPaths) &&
+          profitableToHoist(Insn)) {
+        // Extend HoistPt to NewHoistPt.
+        HoistPt = NewHoistPt;
+        HoistPtBB = NewHoistBB;
+        continue;
+      }
+    } else {
+      if (safeToHoistLdSt_0(NewHoistPt, HoistPt, Insn, MA, K, NumBBsOnAllPaths,
+                            HoistPtBB, NewHoistBB, InsnBB, WL) &&
+          // Hoist loads when hoiting to pred BB even if liveness increases.
+          (profitableToHoist(Insn) || InsnBB->getSinglePredecessor() == NewHoistBB)) {
+        // Extend HoistPt to NewHoistPt.
+        HoistPt = NewHoistPt;
+        HoistPtBB = NewHoistBB;
+        continue;
+      }
+    }
+
+    // At this point it is not safe to extend the current hoisting to
+    // NewHoistPt: save the hoisting list so far.
     if (std::distance(Start, II) > 1)
-      HPL.push_back({HoistBB, SmallVecInsn(Start, II)});
+      HPL.push_back({HoistPtBB, SmallVecInsn(Start, II)});
+
+    // Start over from BB.
+    Start = II;
+    if (K != InsKind::Scalar)
+      MA = MSSA->getMemoryAccess(*Start);
+    HoistPt = Insn;
+    HoistPtBB = InsnBB;
+    NumBBsOnAllPaths = MaxNumberOfBBSInPath;
   }
 
-  // Initialize HPL from Map.
-  void computeInsertionPoints(const VNtoInsns &Map, HoistingPointList &HPL,
-                              InsKind K) {
-    for (const auto &Entry : Map) {
-      if (MaxHoistedThreshold != -1 && ++HoistedCtr > MaxHoistedThreshold)
-        return;
-
-      const SmallVecInsn &V = Entry.second;
-      if (V.size() < 2)
-        continue;
+  // Save the last partition.
+  if (std::distance(Start, II) > 1)
+    HPL.push_back({HoistPtBB, SmallVecInsn(Start, II)});
+}
 
-      // Compute the insertion point and the list of expressions to be hoisted.
-      SmallVecInsn InstructionsToHoist;
-      for (auto I : V)
-        if (!hasEH(I->getParent()))
-          InstructionsToHoist.push_back(I);
+// Returns true when the instruction does not result
+// in a hazard with any other instruction till the end of BB.
+bool GVNHoist::safeToSinkToEnd(Instruction *I, BasicBlock *BB, GVNHoist::InsKind K) {
+  BasicBlock::iterator II(I);
+  auto IMA = MSSA->getMemoryAccess(I);
+  assert (IMA);
+  if (MemoryUseOrDef *I0UD = cast<MemoryUseOrDef>(IMA)) {
+    MemoryAccess *Def = I0UD->getDefiningAccess();
+
+    // Updating Memory PHI is tricky, bail out for now.
+    for (User *U : Def->users())
+      if (isa<MemoryPhi>(U))
+        return false;
+  }
 
-      if (!InstructionsToHoist.empty())
-        partitionCandidates(InstructionsToHoist, HPL, K);
+  while (++II != BB->end()) { // Skip I
+    auto IIMA = MSSA->getMemoryAccess(&*II);
+    // TODO: Bails out on any memory writes for now, improve for non-aliases.
+    if (IIMA) {
+      if (!isa<MemoryUse>(IIMA))
+        return false;
     }
   }
+  return true;
+}
 
-  // Return true when all operands of Instr are available at insertion point
-  // HoistPt. When limiting the number of hoisted expressions, one could hoist
-  // a load without hoisting its access function. So before hoisting any
-  // expression, make sure that all its operands are available at insert point.
-  bool allOperandsAvailable(const Instruction *I,
-                            const BasicBlock *HoistPt) const {
-    for (const Use &Op : I->operands())
-      if (const auto *Inst = dyn_cast<Instruction>(&Op))
-        if (!DT->dominates(Inst->getParent(), HoistPt))
-          return false;
+void GVNHoist::findHoistableInsn(const VNtoInsns &Map,
+                                      GVNHoist::HoistingPointList &HPL,
+                                      GVNHoist::InsKind K) {
+  for (const auto &Entry : Map) {
+    if (MaxHoistedThreshold != -1 && ++HoistedCtr > MaxHoistedThreshold)
+      return;
 
-    return true;
+    const SmallVecInsn &V = Entry.second;
+    if (V.size() < 2)
+      continue;
+
+    // Compute the insertion point and the list of expressions to be hoisted.
+    SmallVecInsn InstructionsToHoist;
+    for (auto I : V) {
+      // We don't need to check for hoist-barriers here because if
+      // I->getParent() has a barrier then I precedes the barrier.
+      if (!hasEH(I->getParent()))
+        InstructionsToHoist.push_back(I);
+    }
+
+    if (!InstructionsToHoist.empty())
+      partitionCandidates(InstructionsToHoist, HPL, K);
   }
+}
 
-  // Same as allOperandsAvailable with recursive check for GEP operands.
-  bool allGepOperandsAvailable(const Instruction *I,
-                               const BasicBlock *HoistPt) const {
-    for (const Use &Op : I->operands())
-      if (const auto *Inst = dyn_cast<Instruction>(&Op))
-        if (!DT->dominates(Inst->getParent(), HoistPt)) {
-          if (const GetElementPtrInst *GepOp =
-                  dyn_cast<GetElementPtrInst>(Inst)) {
-            if (!allGepOperandsAvailable(GepOp, HoistPt))
-              return false;
-            // Gep is available if all operands of GepOp are available.
-          } else {
-            // Gep is not available if it has operands other than GEPs that are
-            // defined in blocks not dominating HoistPt.
-            return false;
-          }
-        }
-    return true;
+/*void GVNHoist::getRanks(Function &F) {
+  // Prefer undef to anything else
+  if (isa<UndefValue>(V))
+    return 0;
+  // Global is also a Constant.
+  if (isa<Constant>(V))
+    return 1;
+  else if (auto *A = dyn_cast<Argument>(V))
+    return 2 + A->getArgNo();
+  if (Rank.count(V))
+    return Rank[V];
+  if (Instruction *I = dyn_cast<Instruction>(V)) {
+     unsigned R = 0;
+     for (Value *V1 : I->operand_values()) {
+       unsigned R1 = getRanks(V1);
+       R = std::max(R, R1);
+     }
+     R = 3 + NumFuncArgs + IRank;
+     Rank[V] = R + 1;
+     return R;
   }
+  // Return a really large number.
+  return std::numeric_limits<unsigned>::max()/2;
+}
 
-  // Make all operands of the GEP available.
-  void makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt,
-                         const SmallVecInsn &InstructionsToHoist,
-                         Instruction *Gep) const {
-    assert(allGepOperandsAvailable(Gep, HoistPt) &&
-           "GEP operands not available");
+void GVNHoist::sortByRank(SmallVectorImpl<unsigned> &V, const VNtoInsns &Map) const {
+
+}*/
+
+void GVNHoist::findSinkableInsn(const VNtoInsns &Map,
+                                GVNHoist::HoistingPointList &HPL,
+                                GVNHoist::InsKind K) {
+  // Sort the VNs by rank, higher ranked should be sunk first.
+  //SmallVector<unsigned, 8> SortedRank;
+  //sortByRank(SortedRank, Map);
+  for (const auto &Entry : Map) {
+    const SmallVecInsn &V = Entry.second;
+    if (V.size() < 2)
+      continue;
+
+    // Only sink to common post-dom
+    SmallVecInsn InstructionsToSink;
+    for (Instruction *I : V) {
+      auto BB = I->getParent();
+      // All the sinkable instructions are collected after any barrier in
+      // a basic block so no need to check for barrier BBs.
+
+      // The instruction should have only one user i.e., PHI in the Succ
+      // TODO: Handle store, calls etc. without users
+      unsigned NumUsers = std::distance(I->user_begin(), I->user_end());
+      if (NumUsers != 1)
+        continue;
 
-    Instruction *ClonedGep = Gep->clone();
-    for (unsigned i = 0, e = Gep->getNumOperands(); i != e; ++i)
-      if (Instruction *Op = dyn_cast<Instruction>(Gep->getOperand(i))) {
+      // The only user should be a PHI.
+      if(!isa<PHINode>(I->user_back()))
+        continue;
 
-        // Check whether the operand is already available.
-        if (DT->dominates(Op->getParent(), HoistPt))
+      // Sink to single successor.
+      if (auto Succ = BB->getSingleSuccessor()) {
+        if (I->user_back()->getParent() != Succ)
+          continue; // PHI should be in the immediate successor.
+        // The successor should have exactly two predecessors.
+        unsigned NumPreds = std::distance(pred_begin(Succ), pred_end(Succ));
+        if (NumPreds != 2)
           continue;
 
-        // As a GEP can refer to other GEPs, recursively make all the operands
-        // of this GEP available at HoistPt.
-        if (GetElementPtrInst *GepOp = dyn_cast<GetElementPtrInst>(Op))
-          makeGepsAvailable(ClonedGep, HoistPt, InstructionsToHoist, GepOp);
+        // Sinkable instruction found, check for safety and profitability.
+        // As the instructions are from a basic block without barriers, we can
+        // sink this instruction as long as there is no hazard.
+        if (K != InsKind::Scalar && !safeToSinkToEnd(I, BB, K))
+          continue;
+        if (!profitableToSink(I))
+          continue;
+
+        // We don't need to check for barriers here because the instruction
+        // will be sunk at the beginning of a basic block i.e., before any
+        // barrier, and I is after any barriers in I->getParent().
+        if (!hasEH(Succ))
+          InstructionsToSink.push_back(I);
+      }
+    }
+
+    //assert(InstructionsToSink.size() < 3 && "Test case");
+    // Sort V such that adjacent Instructions share common PDom.
+    if (InstructionsToSink.size() != 2)
+      continue;
+    //assert(InstructionsToSink.size() != 2 && "Test case");
+    // There should be a unique PHI using I0 and I1 to be legally sinkable.
+    if (InstructionsToSink[0]->user_back() == InstructionsToSink[1]->user_back()) {
+      auto Succ0 = InstructionsToSink[0]->getParent()->getSingleSuccessor();
+      auto Succ1 = InstructionsToSink[1]->getParent()->getSingleSuccessor();
+      assert(Succ0 == Succ1);
+      HPL.push_back({Succ0, InstructionsToSink});
+    }
+  }
+}
+
+bool GVNHoist::allGepOperandsAvailable(const Instruction *I,
+                                       const BasicBlock *HoistPt) const {
+  for (const Use &Op : I->operands())
+    if (const auto *Inst = dyn_cast<Instruction>(&Op))
+      if (!DT->dominates(Inst->getParent(), HoistPt)) {
+        if (const GetElementPtrInst *GepOp =
+                dyn_cast<GetElementPtrInst>(Inst)) {
+          if (!allGepOperandsAvailable(GepOp, HoistPt))
+            return false;
+          // Gep is available if all operands of GepOp are available.
+        } else {
+          // Gep is not available if it has operands other than GEPs that are
+          // defined in blocks not dominating HoistPt.
+          return false;
+        }
       }
+  return true;
+}
 
-    // Copy Gep and replace its uses in Repl with ClonedGep.
-    ClonedGep->insertBefore(HoistPt->getTerminator());
+void GVNHoist::makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt,
+                                 const SmallVecInsn &InstructionsToHoist,
+                                 Instruction *Gep) const {
+  assert(allGepOperandsAvailable(Gep, HoistPt) && "GEP operands not available");
 
-    // Conservatively discard any optimization hints, they may differ on the
-    // other paths.
-    ClonedGep->dropUnknownNonDebugMetadata();
+  Instruction *ClonedGep = Gep->clone();
+  for (unsigned i = 0, e = Gep->getNumOperands(); i != e; ++i)
+    if (Instruction *Op = dyn_cast<Instruction>(Gep->getOperand(i))) {
 
-    // If we have optimization hints which agree with each other along different
-    // paths, preserve them.
-    for (const Instruction *OtherInst : InstructionsToHoist) {
-      const GetElementPtrInst *OtherGep;
-      if (auto *OtherLd = dyn_cast<LoadInst>(OtherInst))
-        OtherGep = cast<GetElementPtrInst>(OtherLd->getPointerOperand());
-      else
-        OtherGep = cast<GetElementPtrInst>(
-            cast<StoreInst>(OtherInst)->getPointerOperand());
-      ClonedGep->andIRFlags(OtherGep);
+      // Check whether the operand is already available.
+      if (DT->dominates(Op->getParent(), HoistPt))
+        continue;
+
+      // As a GEP can refer to other GEPs, recursively make all the operands
+      // of this GEP available at HoistPt.
+      if (GetElementPtrInst *GepOp = dyn_cast<GetElementPtrInst>(Op))
+        makeGepsAvailable(ClonedGep, HoistPt, InstructionsToHoist, GepOp);
     }
 
-    // Replace uses of Gep with ClonedGep in Repl.
-    Repl->replaceUsesOfWith(Gep, ClonedGep);
+  // Copy Gep and replace its uses in Repl with ClonedGep.
+  ClonedGep->insertBefore(HoistPt->getTerminator());
+
+  // Conservatively discard any optimization hints, they may differ on the
+  // other paths.
+  ClonedGep->dropUnknownNonDebugMetadata();
+
+  // If we have optimization hints which agree with each other along different
+  // paths, preserve them.
+  for (const Instruction *OtherInst : InstructionsToHoist) {
+    const GetElementPtrInst *OtherGep;
+    if (auto *OtherLd = dyn_cast<LoadInst>(OtherInst))
+      OtherGep = cast<GetElementPtrInst>(OtherLd->getPointerOperand());
+    else
+      OtherGep = cast<GetElementPtrInst>(
+          cast<StoreInst>(OtherInst)->getPointerOperand());
+    ClonedGep->andIRFlags(OtherGep);
   }
 
-  // In the case Repl is a load or a store, we make all their GEPs
-  // available: GEPs are not hoisted by default to avoid the address
-  // computations to be hoisted without the associated load or store.
-  bool makeGepOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt,
-                                const SmallVecInsn &InstructionsToHoist) const {
-    // Check whether the GEP of a ld/st can be synthesized at HoistPt.
-    GetElementPtrInst *Gep = nullptr;
-    Instruction *Val = nullptr;
-    if (auto *Ld = dyn_cast<LoadInst>(Repl)) {
-      Gep = dyn_cast<GetElementPtrInst>(Ld->getPointerOperand());
-    } else if (auto *St = dyn_cast<StoreInst>(Repl)) {
-      Gep = dyn_cast<GetElementPtrInst>(St->getPointerOperand());
-      Val = dyn_cast<Instruction>(St->getValueOperand());
-      // Check that the stored value is available.
-      if (Val) {
-        if (isa<GetElementPtrInst>(Val)) {
-          // Check whether we can compute the GEP at HoistPt.
-          if (!allGepOperandsAvailable(Val, HoistPt))
-            return false;
-        } else if (!DT->dominates(Val->getParent(), HoistPt))
+  // Replace uses of Gep with ClonedGep in Repl.
+  Repl->replaceUsesOfWith(Gep, ClonedGep);
+}
+
+bool GVNHoist::makeGepOperandsAvailable(
+    Instruction *Repl, BasicBlock *HoistPt,
+    const SmallVecInsn &InstructionsToHoist) const {
+  // Check whether the GEP of a ld/st can be synthesized at HoistPt.
+  GetElementPtrInst *Gep = nullptr;
+  Instruction *Val = nullptr;
+  if (auto *Ld = dyn_cast<LoadInst>(Repl)) {
+    Gep = dyn_cast<GetElementPtrInst>(Ld->getPointerOperand());
+  } else if (auto *St = dyn_cast<StoreInst>(Repl)) {
+    Gep = dyn_cast<GetElementPtrInst>(St->getPointerOperand());
+    Val = dyn_cast<Instruction>(St->getValueOperand());
+    // Check that the stored value is available.
+    if (Val) {
+      if (isa<GetElementPtrInst>(Val)) {
+        // Check whether we can compute the GEP at HoistPt.
+        if (!allGepOperandsAvailable(Val, HoistPt))
           return false;
-      }
+      } else if (!DT->dominates(Val->getParent(), HoistPt))
+        return false;
     }
+  }
 
-    // Check whether we can compute the Gep at HoistPt.
-    if (!Gep || !allGepOperandsAvailable(Gep, HoistPt))
-      return false;
+  // Check whether we can compute the Gep at HoistPt.
+  if (!Gep || !allGepOperandsAvailable(Gep, HoistPt))
+    return false;
 
-    makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Gep);
+  makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Gep);
 
-    if (Val && isa<GetElementPtrInst>(Val))
-      makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Val);
+  if (Val && isa<GetElementPtrInst>(Val))
+    makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Val);
 
-    return true;
+  return true;
+}
+
+void GVNHoist::setAlignment(Instruction *I, Instruction *Repl) {
+  if (auto *ReplacementLoad = dyn_cast<LoadInst>(Repl)) {
+      ReplacementLoad->setAlignment(
+                  std::min(ReplacementLoad->getAlignment(),
+                           cast<LoadInst>(I)->getAlignment()));
+      ++NumLoadsRemoved;
+  } else if (auto *ReplacementStore = dyn_cast<StoreInst>(Repl)) {
+      ReplacementStore->setAlignment(
+                  std::min(ReplacementStore->getAlignment(),
+                           cast<StoreInst>(I)->getAlignment()));
+      ++NumStoresRemoved;
+  } else if (auto *ReplacementAlloca = dyn_cast<AllocaInst>(Repl)) {
+      ReplacementAlloca->setAlignment(
+                  std::max(ReplacementAlloca->getAlignment(),
+                           cast<AllocaInst>(I)->getAlignment()));
+  } else if (isa<CallInst>(Repl)) {
+      ++NumCallsRemoved;
   }
+}
 
-  std::pair<unsigned, unsigned> hoist(HoistingPointList &HPL) {
-    unsigned NI = 0, NL = 0, NS = 0, NC = 0, NR = 0;
-    for (const HoistingPointInfo &HP : HPL) {
-      // Find out whether we already have one of the instructions in HoistPt,
-      // in which case we do not have to move it.
-      BasicBlock *HoistPt = HP.first;
-      const SmallVecInsn &InstructionsToHoist = HP.second;
-      Instruction *Repl = nullptr;
-      for (Instruction *I : InstructionsToHoist)
-        if (I->getParent() == HoistPt)
-          // If there are two instructions in HoistPt to be hoisted in place:
-          // update Repl to be the first one, such that we can rename the uses
-          // of the second based on the first.
-          if (!Repl || firstInBB(I, Repl))
-            Repl = I;
-
-      // Keep track of whether we moved the instruction so we know whether we
-      // should move the MemoryAccess.
-      bool MoveAccess = true;
-      if (Repl) {
-        // Repl is already in HoistPt: it remains in place.
-        assert(allOperandsAvailable(Repl, HoistPt) &&
-               "instruction depends on operands that are not available");
-        MoveAccess = false;
-      } else {
-        // When we do not find Repl in HoistPt, select the first in the list
-        // and move it to HoistPt.
-        Repl = InstructionsToHoist.front();
-
-        // We can move Repl in HoistPt only when all operands are available.
-        // The order in which hoistings are done may influence the availability
-        // of operands.
-        if (!allOperandsAvailable(Repl, HoistPt)) {
-
-          // When HoistingGeps there is nothing more we can do to make the
-          // operands available: just continue.
-          if (HoistingGeps)
-            continue;
+void GVNHoist::removeAndReplace(Instruction *I, Instruction *Repl, MemoryAccess *NewMemAcc) {
+  setAlignment(I, Repl);
+  if (NewMemAcc) {
+    // Update the uses of the old MSSA access with NewMemAcc.
+    MemoryAccess *OldMA = MSSA->getMemoryAccess(I);
+    OldMA->replaceAllUsesWith(NewMemAcc);
+    MSSAUpdater->removeMemoryAccess(OldMA);
+  }
 
-          // When not HoistingGeps we need to copy the GEPs.
-          if (!makeGepOperandsAvailable(Repl, HoistPt, InstructionsToHoist))
-            continue;
-        }
+  Repl->andIRFlags(I);
+  combineKnownMetadata(Repl, I);
+  I->replaceAllUsesWith(Repl);
+  // Also invalidate the Alias Analysis cache.
+  MD->removeInstruction(I);
+  I->eraseFromParent();
+}
 
-        // Move the instruction at the end of HoistPt.
-        Instruction *Last = HoistPt->getTerminator();
-        MD->removeInstruction(Repl);
-        Repl->moveBefore(Last);
+void GVNHoist::removeAndReplace(const SmallVecInsn &InstructionsToHoist,
+                                    Instruction *Repl) {
+  MemoryAccess *NewMemAcc = MSSA->getMemoryAccess(Repl);
+  // Remove and rename all other instructions.
+  for (Instruction *I : InstructionsToHoist)
+    if (I != Repl) {
+      removeAndReplace(I, Repl, NewMemAcc);
+    }
+}
 
-        DFSNumber[Repl] = DFSNumber[Last]++;
+void GVNHoist::removeMPhi(MemoryAccess *NewMemAcc) {
+  // Remove MemorySSA phi nodes with the same arguments.
+  SmallPtrSet<MemoryPhi *, 4> UsePhis;
+  for (User *U : NewMemAcc->users())
+    if (MemoryPhi *Phi = dyn_cast<MemoryPhi>(U))
+      UsePhis.insert(Phi);
+
+  for (auto *Phi : UsePhis) {
+    auto In = Phi->incoming_values();
+    if (all_of(In, [&](Use &U) { return U == NewMemAcc; })) {
+      Phi->replaceAllUsesWith(NewMemAcc);
+      MSSAUpdater->removeMemoryAccess(Phi);
+    }
+  }
+}
+
+std::pair<unsigned, unsigned>
+GVNHoist::hoist(GVNHoist::HoistingPointList &HPL) {
+  unsigned NI = 0, NL = 0, NS = 0, NC = 0, NR = 0;
+  for (const HoistingPointInfo &HP : HPL) {
+    // Find out whether we already have one of the instructions in HoistPt,
+    // in which case we do not have to move it.
+    BasicBlock *HoistPt = HP.first;
+    const SmallVecInsn &InstructionsToHoist = HP.second;
+    Instruction *Repl = nullptr;
+    for (Instruction *I : InstructionsToHoist)
+      if (I->getParent() == HoistPt)
+        // If there are two instructions in HoistPt to be hoisted in place:
+        // update Repl to be the first one, such that we can rename the uses
+        // of the second based on the first.
+        if (!Repl || firstInBB(I, Repl))
+          Repl = I;
+
+    // Keep track of whether we moved the instruction so we know whether we
+    // should move the MemoryAccess.
+    bool MoveAccess = true;
+    if (Repl) {
+      // Repl is already in HoistPt: it remains in place.
+      assert(allOperandsAvailable(Repl, HoistPt) &&
+             "instruction depends on operands that are not available");
+      MoveAccess = false;
+    } else {
+      // When we do not find Repl in HoistPt, select the first in the list
+      // and move it to HoistPt.
+      Repl = InstructionsToHoist.front();
+
+      // We can move Repl in HoistPt only when all operands are available.
+      // The order in which hoistings are done may influence the availability
+      // of operands.
+      if (!allOperandsAvailable(Repl, HoistPt)) {
+
+        // When HoistingGeps there is nothing more we can do to make the
+        // operands available: just continue.
+        if (HoistingGeps)
+          continue;
+
+        // When not HoistingGeps we need to copy the GEPs.
+        if (!makeGepOperandsAvailable(Repl, HoistPt, InstructionsToHoist))
+          continue;
       }
 
-      MemoryAccess *NewMemAcc = MSSA->getMemoryAccess(Repl);
-
-      if (MoveAccess) {
-        if (MemoryUseOrDef *OldMemAcc =
-                dyn_cast_or_null<MemoryUseOrDef>(NewMemAcc)) {
-          // The definition of this ld/st will not change: ld/st hoisting is
-          // legal when the ld/st is not moved past its current definition.
-          MemoryAccess *Def = OldMemAcc->getDefiningAccess();
-          NewMemAcc =
-            MSSAUpdater->createMemoryAccessInBB(Repl, Def, HoistPt, MemorySSA::End);
-          OldMemAcc->replaceAllUsesWith(NewMemAcc);
-          MSSAUpdater->removeMemoryAccess(OldMemAcc);
-        }
+      // Move the instruction at the end of HoistPt.
+      Instruction *Last = HoistPt->getTerminator();
+      MD->removeInstruction(Repl);
+      Repl->moveBefore(Last);
+
+      DFSNumber[Repl] = DFSNumber[Last]++;
+    }
+
+    MemoryAccess *NewMemAcc = MSSA->getMemoryAccess(Repl);
+
+    if (MoveAccess) {
+      if (MemoryUseOrDef *OldMemAcc =
+              dyn_cast_or_null<MemoryUseOrDef>(NewMemAcc)) {
+        // The definition of this ld/st will not change: ld/st hoisting is
+        // legal when the ld/st is not moved past its current definition.
+        MemoryAccess *Def = OldMemAcc->getDefiningAccess();
+        NewMemAcc = MSSAUpdater->createMemoryAccessInBB(Repl, Def, HoistPt,
+                                                        MemorySSA::End);
+        OldMemAcc->replaceAllUsesWith(NewMemAcc);
+        MSSAUpdater->removeMemoryAccess(OldMemAcc);
       }
+    }
 
-      if (isa<LoadInst>(Repl))
-        ++NL;
-      else if (isa<StoreInst>(Repl))
-        ++NS;
-      else if (isa<CallInst>(Repl))
-        ++NC;
-      else // Scalar
-        ++NI;
-
-      // Remove and rename all other instructions.
-      for (Instruction *I : InstructionsToHoist)
-        if (I != Repl) {
-          ++NR;
-          if (auto *ReplacementLoad = dyn_cast<LoadInst>(Repl)) {
-            ReplacementLoad->setAlignment(
-                std::min(ReplacementLoad->getAlignment(),
-                         cast<LoadInst>(I)->getAlignment()));
-            ++NumLoadsRemoved;
-          } else if (auto *ReplacementStore = dyn_cast<StoreInst>(Repl)) {
-            ReplacementStore->setAlignment(
-                std::min(ReplacementStore->getAlignment(),
-                         cast<StoreInst>(I)->getAlignment()));
-            ++NumStoresRemoved;
-          } else if (auto *ReplacementAlloca = dyn_cast<AllocaInst>(Repl)) {
-            ReplacementAlloca->setAlignment(
-                std::max(ReplacementAlloca->getAlignment(),
-                         cast<AllocaInst>(I)->getAlignment()));
-          } else if (isa<CallInst>(Repl)) {
-            ++NumCallsRemoved;
-          }
+    updateLocalStats(Repl, NI, NL, NS, NC);
+    removeAndReplace(InstructionsToHoist, Repl);
+    // All except one i.e., Repl are removed.
+    NR += InstructionsToHoist.size() -1;
 
-          if (NewMemAcc) {
-            // Update the uses of the old MSSA access with NewMemAcc.
-            MemoryAccess *OldMA = MSSA->getMemoryAccess(I);
-            OldMA->replaceAllUsesWith(NewMemAcc);
-            MSSAUpdater->removeMemoryAccess(OldMA);
-          }
+    if (NewMemAcc)
+      removeMPhi(NewMemAcc);
+  }
 
-          Repl->andIRFlags(I);
-          combineKnownMetadata(Repl, I);
-          I->replaceAllUsesWith(Repl);
-          // Also invalidate the Alias Analysis cache.
-          MD->removeInstruction(I);
-          I->eraseFromParent();
-        }
+  updateGlobalStats(NI, NL, NS, NC, NR);
+  return {NI, NL + NC + NS};
+}
 
-      // Remove MemorySSA phi nodes with the same arguments.
-      if (NewMemAcc) {
-        SmallPtrSet<MemoryPhi *, 4> UsePhis;
-        for (User *U : NewMemAcc->users())
-          if (MemoryPhi *Phi = dyn_cast<MemoryPhi>(U))
-            UsePhis.insert(Phi);
-
-        for (auto *Phi : UsePhis) {
-          auto In = Phi->incoming_values();
-          if (all_of(In, [&](Use &U) { return U == NewMemAcc; })) {
-            Phi->replaceAllUsesWith(NewMemAcc);
-            MSSAUpdater->removeMemoryAccess(Phi);
-          }
+// Update MemorySSA if mem-refs are sunk.
+std::pair<unsigned, unsigned>
+GVNHoist::sink(GVNHoist::HoistingPointList &HPL) {
+  unsigned NI = 0, NL = 0, NS = 0, NC = 0;
+  for (const HoistingPointInfo &HP : HPL) {
+    BasicBlock *SinkPt = HP.first;
+    const SmallVecInsn &InstructionsToHoist = HP.second;
+    assert(InstructionsToHoist.size() == 2);
+    Instruction* I0 = InstructionsToHoist[0];
+    Instruction* I1 = InstructionsToHoist[1];
+
+    // We need to check availability for only I0 as I1 will be removed. TODO:
+    // Additionally, we can still sink if the operand not available is also
+    // getting sunk will available operands.
+    if (!allOperandsAvailable(I0, SinkPt))
+      continue;
+    // Keep I0 and remove I1 and PN
+    PHINode *PN = cast<PHINode>(I0->user_back());
+    MemoryAccess *I0MemAccess = MSSA->getMemoryAccess(I0);
+    DEBUG(dbgs() << "\nSinking:" << *I0 << *I1 << *PN);
+
+    if (I0MemAccess) {
+      // This MSSA update is only for mem-refs with one use in a PHI in Succ BB.
+      MemoryUseOrDef *I0UD = cast<MemoryUseOrDef>(I0MemAccess);
+      //MSSAUpdater->moveToPlace(I0UD, PN->getParent(), MemorySSA::Beginning);
+      MemoryAccess *Def = I0UD->getDefiningAccess();
+
+      assert(none_of(Def->users(), [](User *U) { return isa<MemoryPhi>(U); }));
+      /*SmallPtrSet<MemoryPhi *, 4> UsePhis;
+        for (User *U : Def->users())
+        if (MemoryPhi *Phi = dyn_cast<MemoryPhi>(U))
+        UsePhis.insert(Phi);*/
+
+      MemoryAccess *NewMemAcc = MSSAUpdater->createMemoryAccessInBB(I0, Def, SinkPt,
+                                                                    MemorySSA::Beginning);
+
+      MemoryAccess *I1MemAccess = MSSA->getMemoryAccess(I1);
+      I0MemAccess->replaceAllUsesWith(NewMemAcc);
+      I1MemAccess->replaceAllUsesWith(NewMemAcc);
+
+      MSSAUpdater->removeMemoryAccess(I0MemAccess);
+      MSSAUpdater->removeMemoryAccess(I1MemAccess);
+
+
+      /*for (auto *Phi : UsePhis) {
+        auto In = Phi->incoming_values();
+        if (all_of(In, [&](Use &U) { return U == NewMemAcc; })) {
+        Phi->replaceAllUsesWith(NewMemAcc);
+        MSSAUpdater->removeMemoryAccess(Phi);
         }
-      }
+        }*/
     }
 
-    NumHoisted += NL + NS + NC + NI;
-    NumRemoved += NR;
-    NumLoadsHoisted += NL;
-    NumStoresHoisted += NS;
-    NumCallsHoisted += NC;
-    return {NI, NL + NC + NS};
+    setAlignment(I1, I0);
+    I0->andIRFlags(I1);
+    combineKnownMetadata(I0, I1);
+    // I1->replaceAllUsesWith(I0); Not required because I1 used once in PHI.
+    // Also invalidate the Alias Analysis cache.
+    MD->removeInstruction(I1);
+    MD->removeInstruction(PN);
+    PN->replaceAllUsesWith(I0);
+    // FIXME: Update Memory SSA
+    // Update the uses of the old MSSA access with I0.
+    // Remove MSSA for I1
+    // Remove MSSA for PN, and replace with I0
+    // Combine metadata of I0 and I1
+    // Memory PHI?
+    //assert(0);
+    bool found = false;
+    auto It = SinkPt->begin();
+    while (It != SinkPt->end()) { // Find the last PHI to insert I0
+      if (!isa<PHINode>(It))
+          break;
+      if (PN == &*It)
+        found = true;
+      ++It;
+    }
+    assert(found && "PHI replaced in wrong BB");
+    I0->moveBefore(*SinkPt, It);
+    PN->eraseFromParent();
+    I1->eraseFromParent();
+
+    NumSunk++;
   }
+  return {NI, NL + NC + NS};
+}
 
-  // Hoist all expressions. Returns Number of scalars hoisted
-  // and number of non-scalars hoisted.
-  std::pair<unsigned, unsigned> hoistExpressions(Function &F) {
-    InsnInfo II;
-    LoadInfo LI;
-    StoreInfo SI;
-    CallInfo CI;
-    for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
-      int InstructionNb = 0;
-      for (Instruction &I1 : *BB) {
-        // Only hoist the first instructions in BB up to MaxDepthInBB. Hoisting
-        // deeper may increase the register pressure and compilation time.
-        if (MaxDepthInBB != -1 && InstructionNb++ >= MaxDepthInBB)
-          break;
+bool GVNHoist::getVN(Instruction *I) {
+  // Do not value number terminator instructions.
+  if (isa<TerminatorInst>(I))
+    return false;
 
-        // Do not value number terminator instructions.
-        if (isa<TerminatorInst>(&I1))
-          break;
+  if (auto *Load = dyn_cast<LoadInst>(I))
+    LI.insert(Load, VN);
+  else if (auto *Store = dyn_cast<StoreInst>(I))
+    SI.insert(Store, VN);
+  else if (auto *Call = dyn_cast<CallInst>(I)) {
+    if (auto *Intr = dyn_cast<IntrinsicInst>(Call)) {
+      if (isa<DbgInfoIntrinsic>(Intr) ||
+          Intr->getIntrinsicID() == Intrinsic::assume)
+        return true;
+    }
+    if (Call->mayHaveSideEffects())
+      return false;
 
-        if (auto *Load = dyn_cast<LoadInst>(&I1))
-          LI.insert(Load, VN);
-        else if (auto *Store = dyn_cast<StoreInst>(&I1))
-          SI.insert(Store, VN);
-        else if (auto *Call = dyn_cast<CallInst>(&I1)) {
-          if (auto *Intr = dyn_cast<IntrinsicInst>(Call)) {
-            if (isa<DbgInfoIntrinsic>(Intr) ||
-                Intr->getIntrinsicID() == Intrinsic::assume)
-              continue;
-          }
-          if (Call->mayHaveSideEffects())
-            break;
+    if (Call->isConvergent())
+      return false;
+
+    CI.insert(Call, VN);
+  } else if (HoistingGeps || !isa<GetElementPtrInst>(I))
+    // Do not hoist scalars past calls that may write to memory because
+    // that could result in spills later. geps are handled separately.
+    // TODO: We can relax this for targets like AArch64 as they have more
+    // registers than X86.
+    II.insert(I, VN);
+
+  return true;
+}
+
+void GVNHoist::barriersAndVNs(Function &F, bool Sinking) {
+  for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
+    int InstructionNb = 0;
+    bool BarrierFound = false;
+    SmallVector<std::pair<unsigned, unsigned>, 4> CurrentBBVNs;
+    for (Instruction &I1 : *BB) {
+      // If I1 cannot guarantee progress, subsequent instructions
+      // in BB cannot be hoisted anyways.
+      // TODO: For sink operation anything after hoist barrier is okay to sink
+      // but nothing before the hoist barrier.
+      if (!isGuaranteedToTransferExecutionToSuccessor(&I1)) {
+        //NumBarriers++;
+        HoistBarrier.insert(BB);
+        BarrierFound = true;
+        break;
+      }
+
+      // Only hoist the first instructions in BB up to MaxDepthInBB. Hoisting
+      // deeper may increase the register pressure and compilation time.
+      if (MaxDepthInBB != -1 && InstructionNb++ >= MaxDepthInBB)
+        break;
 
-          if (Call->isConvergent())
+      if (!getVN(&I1))
+        break;
+    }
+    if (Sinking && BarrierFound) {
+      // TODO: clear VNs for this basicblock.
+      // Instructions after the last barriers can be sunk.
+      for (BasicBlock::reverse_iterator It = BB->rbegin(), Ie = BB->rend();
+           It != Ie; ++It) {
+          if (!isGuaranteedToTransferExecutionToSuccessor(&*It))
+            break;
+          if (!getVN(&*It))
             break;
+          //assert(0);
+      }
+    }
+  }
+}
+
+std::pair<unsigned, unsigned> GVNHoist::hoistExpressions(Function &F) {
+  barriersAndVNs(F, false);
+  HoistingPointList HPL;
+  findHoistableInsn(II.getVNTable(), HPL, InsKind::Scalar);
+  findHoistableInsn(LI.getVNTable(), HPL, InsKind::Load);
+  findHoistableInsn(SI.getVNTable(), HPL, InsKind::Store);
+  findHoistableInsn(CI.getScalarVNTable(), HPL, InsKind::Scalar);
+  findHoistableInsn(CI.getLoadVNTable(), HPL, InsKind::Load);
+  findHoistableInsn(CI.getStoreVNTable(), HPL, InsKind::Store);
+  return hoist(HPL);
+}
+
+std::pair<unsigned, unsigned> GVNHoist::sinkExpressions(Function &F) {
+  barriersAndVNs(F, false); // First clear VNs before barrier in that BB. see TODO then set it to true.
+  HoistingPointList HPL;
+  findSinkableInsn(II.getVNTable(), HPL, InsKind::Scalar);
+  findSinkableInsn(LI.getVNTable(), HPL, InsKind::Load);
+  findSinkableInsn(SI.getVNTable(), HPL, InsKind::Store);
+  findSinkableInsn(CI.getScalarVNTable(), HPL, InsKind::Scalar);
+  findSinkableInsn(CI.getLoadVNTable(), HPL, InsKind::Load);
+  findSinkableInsn(CI.getStoreVNTable(), HPL, InsKind::Store);
+  return sink(HPL);
+}
+
+void GVNHoist::constructDJGraph(DomLevelsT &DomLevels, DenseSet<EdgeT> &JEdges,
+                                BBLevelT &BBLevels) {
+  for (auto DFI = df_begin(DT->getRootNode()), DFE = df_end(DT->getRootNode());
+       DFI != DFE; ++DFI) {
+    // Since getPathLength is inclusive of both the terminal nodes
+    // i.e., Entry and *DFI so decrease by 1.
+    unsigned Depth = DFI.getPathLength() - 1;
+    BasicBlock *BB = (*DFI)->getBlock();
+    DomLevels[BB] = Depth;
+    BBLevels[Depth].push_back(BB);
+    for (BasicBlock *Succ : BB->getTerminator()->successors())
+      if (!DT->properlyDominates(BB, Succ)) {
+        JEdges.insert(std::make_pair(BB, Succ));
+      }
+  }
+}
 
-          CI.insert(Call, VN);
-        } else if (HoistingGeps || !isa<GetElementPtrInst>(&I1))
-          // Do not hoist scalars past calls that may write to memory because
-          // that could result in spills later. geps are handled separately.
-          // TODO: We can relax this for targets like AArch64 as they have more
-          // registers than X86.
-          II.insert(&I1, VN);
+bool GVNHoist::constructMergeSet(DomLevelsT &DomLevels, DenseSet<EdgeT> &JEdges,
+                                 BBLevelT &BBLevels) {
+  bool Repeat = false;
+  DenseSet<EdgeT> VisJEdges; // Visited J Edges.
+  unsigned PrevLev = 0;
+  for (std::pair<const unsigned, BBLevelKeyT> &P : BBLevels) {
+    assert(PrevLev <= P.first);
+    PrevLev = P.first;
+    for (BasicBlock *CurrBB : P.second) {
+      for (auto PB = pred_begin(CurrBB), PE = pred_end(CurrBB); PB != PE;
+           ++PB) {
+        EdgeT Edge(*PB, CurrBB); // For all incoming edges to CurrBB.
+        if (JEdges.count(Edge) && !VisJEdges.count(Edge)) {
+          VisJEdges.insert(Edge); // Visit
+          BasicBlock *Src = Edge.first;
+          BasicBlock *Dst = Edge.second;
+          BasicBlock *INode = nullptr;
+          MergeSet[Dst].insert(Dst);                 // The target of JEdge.
+          while (DomLevels[Src] >= DomLevels[Dst]) { // A backedge.
+            DEBUG(dbgs() << "\nVisiting: " << Src->getName() << " -> "
+                         << Dst->getName());
+            // Merge (tmp) = Merge (tmp) U Merge (tnode) U { tnode }
+            // MergeSet(tnode) contains tnode.
+            MergeSet[Src].insert(MergeSet[Dst].begin(), MergeSet[Dst].end());
+            INode = Src;
+            DEBUG(dbgs() << "IDom of " << Src->getName() << " is ");
+            Src = DT->getIDom(Src);
+            DEBUG(dbgs() << Src->getName());
+          }
+          for (auto PINode = pred_begin(INode), PENode = pred_end(INode);
+               PINode != PENode; ++PINode) { // INode is an ancestor of SNode.
+            EdgeT Edge(*PINode, INode);
+            if (VisJEdges.count(Edge)) {
+              assert(JEdges.count(Edge));
+              BasicBlock *SNode = *PINode;
+              // Check inconsistency, MergeSet[Dest] subset of MergeSet[Src]
+              if (!isSubset(MergeSet[INode], MergeSet[SNode]))
+                Repeat = true;
+            }
+          }
+        }
       }
     }
+  }
+  return Repeat;
+}
 
-    HoistingPointList HPL;
-    computeInsertionPoints(II.getVNTable(), HPL, InsKind::Scalar);
-    computeInsertionPoints(LI.getVNTable(), HPL, InsKind::Load);
-    computeInsertionPoints(SI.getVNTable(), HPL, InsKind::Store);
-    computeInsertionPoints(CI.getScalarVNTable(), HPL, InsKind::Scalar);
-    computeInsertionPoints(CI.getLoadVNTable(), HPL, InsKind::Load);
-    computeInsertionPoints(CI.getStoreVNTable(), HPL, InsKind::Store);
-    return hoist(HPL);
+const Instruction *GVNHoist::lastUser(const Instruction *I,
+                                      const Value *Val) const {
+  // TODO: Make isLiveOutUsingMergeSet take const parameters.
+  assert(!isLiveOutUsingMergeSet(const_cast<BasicBlock *>(I->getParent()),
+                                 const_cast<Value *>(Val)));
+  const BasicBlock *BB = I->getParent();
+  BasicBlock::const_iterator BI(I), BE = BB->end();
+  unsigned ICount = std::distance(BI, BE);
+  if (Val->getNumUses() <= ICount) { // Iterate on uses
+    for (const User *U : Val->users()) {
+      const Instruction *UserI = cast<Instruction>(U);
+      if (UserI != I && UserI->getParent() == BB) {
+        if (firstInBB(I, UserI)) // I precedes another Use => not a kill.
+          return UserI;
+      }
+    }
+    return I;
   }
-};
+  // else Iterate on Instructions
+  for (++BI; BI != BE; ++BI) {
+    for (unsigned i = 0; i < BI->getNumOperands(); ++i)
+      if (BI->getOperandUse(i).get() == Val)
+        return &*BI;
+  }
+  return I;
+}
 
-class GVNHoistLegacyPass : public FunctionPass {
-public:
-  static char ID;
+bool GVNHoist::isLiveOutUsingMergeSet(BasicBlock *BB, Value *Val) const {
+  assert(BB);
+  const BasicBlock *ValDefBB = nullptr; // BasicBlock defining Val
+  if (Instruction *I = dyn_cast<Instruction>(Val))
+    ValDefBB = I->getParent();
+  // FIXME!
+  // We are assuming when DefBB is not defined then the value is a parameter.
+
+  // Case when Val is defined in BB, if any of the use is outside BB (DefBB)
+  // then it must be live-out.
+  if (ValDefBB == BB)
+    for (User *U : Val->users()) {
+      if (cast<Instruction>(U)->getParent() != BB)
+        return true;
+    }
 
-  GVNHoistLegacyPass() : FunctionPass(ID) {
-    initializeGVNHoistLegacyPassPass(*PassRegistry::getPassRegistry());
+  // Mr(n) = M(n) U {n}; Create a new set from the merge set
+  // Ms(n) = Ms(n) U Mr(w); For each successor w of BB
+  SmallSetBB Ms; // Ms = null-set
+  for (BasicBlock *Succ : BB->getTerminator()->successors()) {
+    Ms.insert(Succ); // Mr(Succ) = Succ U M(Succ)
+    for (BasicBlock *BB : MergeSet.lookup(Succ))
+      Ms.insert(BB); // M(Succ)
   }
 
-  bool runOnFunction(Function &F) override {
-    if (skipFunction(F))
-      return false;
-    auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-    auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
-    auto &MD = getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
-    auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
+  // Iterate over all the uses of Val, if any of its users is in the mergeset
+  // of \p BB then Val is LiveOut from BB.
+  for (User *U : Val->users()) {
+    BasicBlock *UserDefBB = nullptr;
+    if (Instruction *I = dyn_cast<Instruction>(U))
+      UserDefBB = I->getParent();
+    else // Assuming live-out conservatively, the user can be a global
+      // TODO: maybe return false is okay??
+      return true; //llvm_unreachable("User is not an instruction.");
+    while (UserDefBB && (UserDefBB != ValDefBB)) {
+      if (Ms.count(UserDefBB)) // if t /\ Ms(n) then return true;
+        return true;
+      UserDefBB = DT->getIDom(UserDefBB);
+    }
+  }
+  return false;
+}
 
-    GVNHoist G(&DT, &AA, &MD, &MSSA);
-    return G.run(F);
+bool GVNHoist::run(Function &F) {
+  //NumFuncArgs = F.arg_size();
+  DT->updateDFSNumbers();
+  DomLevelsT DomLevels;
+  DenseSet<EdgeT> JEdges;
+  BBLevelT BBLevels;
+  constructDJGraph(DomLevels, JEdges, BBLevels);
+  // printBBLevels(BBLevels);
+  DEBUG(printJEdges(JEdges));
+  while (constructMergeSet(DomLevels, JEdges, BBLevels))
+    ;
+  DEBUG(printMergeSet(MergeSet));
+  VN.setDomTree(DT);
+  VN.setAliasAnalysis(AA);
+  VN.setMemDep(MD);
+  bool Res = false;
+  // Perform DFS Numbering of instructions.
+  unsigned BBI = 0;
+  for (const BasicBlock *BB : depth_first(&F.getEntryBlock())) {
+    DFSNumber[BB] = ++BBI;
+    unsigned I = 0;
+    for (auto &Inst : *BB)
+      DFSNumber[&Inst] = ++I;
   }
 
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addRequired<AAResultsWrapperPass>();
-    AU.addRequired<MemoryDependenceWrapperPass>();
-    AU.addRequired<MemorySSAWrapperPass>();
-    AU.addPreserved<DominatorTreeWrapperPass>();
-    AU.addPreserved<MemorySSAWrapperPass>();
+  int ChainLength = 0;
+
+  // FIXME: use lazy evaluation of VN to avoid the fix-point computation.
+  while (1) {
+    if (MaxChainLength != -1 && ++ChainLength >= MaxChainLength)
+      return Res;
+    clearVNTables();
+    auto HoistStat = hoistExpressions(F);
+    if (HoistStat.first + HoistStat.second == 0)
+      break;
+
+    if (HoistStat.second > 0)
+      // To address a limitation of the current GVN, we need to rerun the
+      // hoisting after we hoisted loads or stores in order to be able to
+      // hoist all scalars dependent on the hoisted ld/st.
+      VN.clear();
+
+    Res = true;
   }
-};
-} // namespace
+
+  clearVNTables();
+  sinkExpressions(F);
+  return Res;
+}
+
+} // namespace llvm
 
 PreservedAnalyses GVNHoistPass::run(Function &F, FunctionAnalysisManager &AM) {
   DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
Index: llvm/test/Transforms/GVNHoist/dj-edge-detect-das-taco.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/GVNHoist/dj-edge-detect-das-taco.ll
@@ -0,0 +1,62 @@
+; RUN: opt -S -gvn-hoist -debug < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+; Testcase taken from:
+; Das, Dibyendu, B. Dupont De Dinechin, and Ramakrishna Upadrasta. "Efficient
+; liveness computation using merge sets and dj-graphs." ACM Transactions on
+; Architecture and Code Optimization (TACO) 8.4 (2012): 27.
+
+; check the J edges
+; CHECK: Found a JEdge: bb4 -> bb5
+; CHECK: Found a JEdge: bb5 -> bb6
+; CHECK: Found a JEdge: bb6 -> bb5
+; CHECK: Found a JEdge: bb7 -> bb2
+; CHECK: Found a JEdge: bb9 -> bb6
+; CHECK: Found a JEdge: bb10 -> bb8
+
+; check the mergesets
+; CHECK: MergeSet of: bb2: bb2,
+; CHECK: MergeSet of: bb3: bb2,
+; CHECK: MergeSet of: bb4: bb2, bb5, bb6,
+; CHECK: MergeSet of: bb8: bb2, bb8, bb5, bb6,
+; CHECK: MergeSet of: bb5: bb2, bb5, bb6,
+; CHECK: MergeSet of: bb6: bb2, bb5, bb6,
+; CHECK: MergeSet of: bb7: bb2,
+; CHECK: MergeSet of: bb9: bb2, bb8, bb5, bb6,
+; CHECK: MergeSet of: bb10: bb2, bb8, bb5, bb6,
+
+
+define void @irreducible_loop() {
+bb1:
+br label %bb2
+
+bb2:
+  br i1 undef, label %bb3, label %bb11
+
+bb3:
+  br i1 undef, label %bb4, label %bb8
+
+bb4:
+  br label %bb5
+
+bb5:
+  br label %bb6
+
+bb6:
+  br i1 undef, label %bb7, label %bb5
+
+bb7:
+  br label %bb2
+
+bb8:
+  br label %bb9
+
+bb9:
+  br i1 undef, label %bb6, label %bb10
+
+bb10:
+  br label %bb8
+
+bb11:
+  ret void
+}
\ No newline at end of file
Index: llvm/test/Transforms/GVNHoist/dj-edge-detect.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/GVNHoist/dj-edge-detect.ll
@@ -0,0 +1,107 @@
+; RUN: opt -S -gvn-hoist -debug < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+; check the J edges
+; CHECK: Found a JEdge: bb2 -> bb3
+; CHECK: Found a JEdge: bb18 -> bb6
+; CHECK: Found a JEdge: bb15 -> bb18
+; CHECK: Found a JEdge: bb8 -> bb18
+; CHECK: Found a JEdge: bb10 -> bb18
+; CHECK: Found a JEdge: bb13 -> bb18
+
+
+; check the mergesets
+; CHECK: MergeSet of: bb3: bb3,
+; CHECK: MergeSet of: bb2: bb3,
+; CHECK: MergeSet of: bb6: bb3, bb6,
+; CHECK: MergeSet of: bb18: bb3, bb6, bb18,
+; CHECK: MergeSet of: bb15: bb3, bb6, bb18,
+; CHECK: MergeSet of: bb8: bb3, bb6, bb18,
+; CHECK: MergeSet of: bb10: bb3, bb6, bb18,
+; CHECK: MergeSet of: bb13: bb3, bb6, bb18,
+
+%0 = type { %1*, %8* }
+%1 = type { %2*, %3* }
+%2 = type { i32, i32, i32, i32, i32*, i8*, i32 }
+%3 = type { %4, i32, i32, %7*, i8, i8, i8, i8, i8 }
+%4 = type { i8, i32, i8*, %5 }
+%5 = type { %6 }
+%6 = type { i16 }
+%7 = type { i8*, i8*, i8*, i8*, i8* }
+%8 = type <{ i8*, i8*, i32, %9, %17*, %19* }>
+%9 = type { %10*, i8*, i8*, i8* (i8*)*, i32* (i8*)*, i8*, %15, i32, i32, i8*, %16*, i32, i8, i8*, i8, i8* }
+%10 = type { i8*, %11*, i8*, %12, %13*, i8*, i32 (i8*)*, i32, i8, %14 }
+%11 = type { i8*, i32 (i8*, %12*, i8*)*, i32 (i8*)*, i32 (i8*, i32)*, %16* (i8*)*, i32 (i8*)*, i32 (i8*)*, i32 (...)*, i32 (...)*, i32 (...)*, void (i8*, i32)*, void (i8*, i32, i8*, i32)*, i32 (i8*, i32, i8*)* }
+%12 = type { i8*, i32 (i8*, i32, i8*, i32, i8)*, i32 (i8*, i32, i8*)*, void (i8*, i8*, i32)*, i32 (i8*)*, i32 (i8*, i32, i32)* }
+%13 = type { i32 (i8*, i8*, i32)*, i32 (i8*, i32, i32)* }
+%14 = type { i32, i32, i8, i8, i8 }
+%15 = type { i32, i32, i32 }
+%16 = type { i16, i16, i32, i32 }
+%17 = type { i8*, i8*, i16 (i8*, i8*, i16)*, i32 (i8*, i8, i32)*, i32 (%16*, i32)*, %18, i32, i32 }
+%18 = type { i8*, i8* (i8*, i8*, i32)* }
+%19 = type { %20, i32, i32 }
+%20 = type { i32, i32, [8 x i8], i8, i8, i16 }
+
+@global = external hidden unnamed_addr constant [10 x i8], align 1
+
+define void @ham(i8* nocapture readnone %arg) local_unnamed_addr {
+bb:
+  %tmp = alloca %0*, align 8
+  %tmp1 = bitcast %0** %tmp to i8*
+  br label %bb3
+
+bb2:                                              ; preds = %bb6
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %tmp4 = load %0*, %0** %tmp, align 8, !tbaa !0
+  %tmp5 = call i32 @quux(i8* %tmp1)
+  br label %bb6
+
+bb6:                                              ; preds = %bb18, %bb3
+  %tmp7 = phi i32 [ %tmp5, %bb3 ], [ %tmp19, %bb18 ]
+  switch i32 %tmp7, label %bb15 [
+    i32 0, label %bb2
+    i32 35, label %bb8
+    i32 11, label %bb10
+    i32 12, label %bb13
+  ]
+
+bb8:                                              ; preds = %bb6
+  %tmp9 = load %0*, %0** %tmp, align 8, !tbaa !0
+  br label %bb18
+
+bb10:                                             ; preds = %bb6
+  %tmp11 = load %0*, %0** %tmp, align 8, !tbaa !0
+  %tmp12 = call i32 @quux(i8* %tmp1)
+  br label %bb18
+
+bb13:                                             ; preds = %bb6
+  %tmp14 = call i32 @quux(i8* %tmp1)
+  br label %bb18
+
+bb15:                                             ; preds = %bb6
+  %tmp16 = load %0*, %0** %tmp, align 8, !tbaa !0
+  %tmp17 = call i32 @quux(i8* null)
+  br label %bb18
+
+bb18:                                             ; preds = %bb15, %bb13, %bb10, %bb8
+  %tmp19 = phi i32 [ %tmp17, %bb15 ], [ %tmp14, %bb13 ], [ %tmp12, %bb10 ], [ 0, %bb8 ]
+  br label %bb6
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+
+declare i32 @zot(%0**)
+
+declare i32 @quux(i8*)
+
+declare i32 @wombat(i8* nocapture readonly, ...)
+
+attributes #0 = { argmemonly nounwind }
+
+!0 = !{!1, !1, i64 0}
+!1 = !{!"any pointer", !2, i64 0}
+!2 = !{!"omnipotent char", !3, i64 0}
+!3 = !{!"Simple C/C++ TBAA"}
Index: llvm/test/Transforms/GVNHoist/hoist-mssa.ll
===================================================================
--- llvm/test/Transforms/GVNHoist/hoist-mssa.ll
+++ llvm/test/Transforms/GVNHoist/hoist-mssa.ll
@@ -1,7 +1,9 @@
 ; RUN: opt -S -gvn-hoist < %s | FileCheck %s
 
-; Check that store hoisting works: there should be only one store left.
-; CHECK-LABEL: @getopt
+; Check that store hoisting works: there should be only one store hoisted
+; because load cannot be hoisted as it is not profitable.
+; CHECK-LABEL: @getopt(
+; CHECK: store i32
 ; CHECK: store i32
 ; CHECK-NOT: store i32
 
@@ -47,6 +49,51 @@
   ret void
 }
 
+; Check that store hoisting works: there should be only one store left because
+; load should also be hoisted in this case.
+; CHECK-LABEL: @getopt_hoist_load(
+; CHECK: store i32
+; CHECK-NOT: store i32
+define void @getopt_hoist_load(i32* %p1) {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb
+  br i1 undef, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb1
+  br label %bb13
+
+bb3:                                              ; preds = %bb1
+  br i1 undef, label %bb4, label %bb9
+
+bb4:                                              ; preds = %bb3
+  %tmp = load i32, i32* @optind, align 4
+  br i1 undef, label %bb5, label %bb7
+
+bb5:                                              ; preds = %bb4
+  %tmp6 = add nsw i32 %tmp, 1
+  store i32 %tmp6, i32* %p1, align 4
+  br label %bb12
+
+bb7:                                              ; preds = %bb4
+  %tmp8 = add nsw i32 %tmp, 1
+  store i32 %tmp8, i32* %p1, align 4
+  br label %bb13
+
+bb9:                                              ; preds = %bb3
+  %tmp10 = load i32, i32* @optind, align 4
+  %tmp11 = add nsw i32 %tmp10, 1
+  store i32 %tmp11, i32* %p1, align 4
+  br label %bb12
+
+bb12:                                             ; preds = %bb9, %bb5
+  br label %bb13
+
+bb13:                                             ; preds = %bb12, %bb7, %bb2
+  ret void
+}
+
 @GlobalVar = internal global float 1.000000e+00
 
 ; Check that we hoist stores and remove the MSSA phi node.
Index: llvm/test/Transforms/GVNHoist/hoist-very-busy.ll
===================================================================
--- llvm/test/Transforms/GVNHoist/hoist-very-busy.ll
+++ llvm/test/Transforms/GVNHoist/hoist-very-busy.ll
@@ -32,3 +32,24 @@
 declare void @longjmp(%struct.__jmp_buf_tag*, i32) #0
 
 attributes #0 = { noreturn nounwind }
+
+; Check that the call and fcmp are hoisted.
+; CHECK-LABEL: define void @fun(
+; CHECK: store
+; CHECK-NOT: store
+
+define void @fun() {
+entry:
+  br label %if.then
+
+if.then:                                          ; preds = %entry
+  br i1 undef, label %sw0, label %sw1
+
+sw0:
+  store i32 1, i32* @G
+  unreachable
+
+sw1:
+  store i32 1, i32* @G
+  ret void
+}