Index: lib/Analysis/BasicAliasAnalysis.cpp
===================================================================
--- lib/Analysis/BasicAliasAnalysis.cpp
+++ lib/Analysis/BasicAliasAnalysis.cpp
@@ -682,8 +682,11 @@
 
 #ifndef NDEBUG
 static const Function *getParent(const Value *V) {
-  if (const Instruction *inst = dyn_cast<Instruction>(V))
+  if (const Instruction *inst = dyn_cast<Instruction>(V)) {
+    if (!inst->getParent())
+      return nullptr;
     return inst->getParent()->getParent();
+  }
 
   if (const Argument *arg = dyn_cast<Argument>(V))
     return arg->getParent();
Index: lib/Transforms/Scalar/NewGVN.cpp
===================================================================
--- lib/Transforms/Scalar/NewGVN.cpp
+++ lib/Transforms/Scalar/NewGVN.cpp
@@ -34,6 +34,14 @@
 /// performing symbolic evaluation, forward propagation, and simplification of
 /// operations based on the value numbers deduced so far.
 ///
+/// In order to make the GVN mostly-complete, we use a technique derived from
+/// "Detection of Redundant Expressions: A Complete and Polynomial-time
+/// Algorithm in SSA" by R.R. Pai.  The source of incompleteness in most SSA
+/// based GVN algorithms is related to their inability to detect equivalence
+/// between phi of ops (IE phi(a+b, c+d)) and op of phis (phi(a,c) + phi(b, d)).
+/// We resolve this issue by generating the equivalent "phi of ops" form for
+/// each op of phis we see, in a way that only takes polynomial time to resolve.
+///
 /// We also do not perform elimination by using any published algorithm.  All
 /// published algorithms are O(Instructions). Instead, we use a technique that
 /// is O(number of operations with the same value number), enabling us to skip
@@ -104,12 +112,11 @@
 STATISTIC(NumGVNSortedLeaderChanges, "Number of sorted leader changes");
 STATISTIC(NumGVNAvoidedSortedLeaderChanges,
           "Number of avoided sorted leader changes");
-STATISTIC(NumGVNNotMostDominatingLeader,
-          "Number of times a member dominated it's new classes' leader");
 STATISTIC(NumGVNDeadStores, "Number of redundant/dead stores eliminated");
 DEBUG_COUNTER(VNCounter, "newgvn-vn",
               "Controls which instructions are value numbered")
-
+DEBUG_COUNTER(PHIOfOpsCounter, "newgvn-phi",
+              "Controls which instructions we create phi of ops for")
 // Currently store defining access refinement is too slow due to basicaa being
 // egregiously slow.  This flag lets us keep it working while we work on this
 // issue.
@@ -172,10 +179,9 @@
       }
     }
     // See if we really were the root of a component, by seeing if we still have
-    // our DFSNumber.
-    // If we do, we are the root of the component, and we have completed a
-    // component. If we do not,
-    // we are not the root of a component, and belong on the component stack.
+    // our DFSNumber.  If we do, we are the root of the component, and we have
+    // completed a component. If we do not, we are not the root of a component,
+    // and belong on the component stack.
     if (Root.lookup(I) == OurDFS) {
       unsigned ComponentID = Components.size();
       Components.resize(Components.size() + 1);
@@ -425,6 +431,36 @@
   // Value Mappings.
   DenseMap<Value *, CongruenceClass *> ValueToClass;
   DenseMap<Value *, const Expression *> ValueToExpression;
+  // Value PHI handling, used to make equivalence between phi(op, op) and
+  // op(phi, phi).
+  // These mappings just store various data that would normally be part of the
+  // IR.
+
+  DenseSet<const Instruction *> PHINodeUses;
+  // Map a temporary instruction we created to a parent block.
+  DenseMap<const Value *, BasicBlock *> TempToBlock;
+  // Map between the temporary phis we created and the real instructions they
+  // are known equivalent to.
+  DenseMap<const Value *, PHINode *> RealToTemp;
+  DenseMap<const Value *, Value *> TempToReal;
+  // In order to know when we shoudl re-process instructions that have
+  // phi-of-ops, we track the set of expressions that they needed as
+  // leaders. When we disover new leaders for those expressions, we process the
+  // associated phi-of-op instructions again in case they have changed.  The
+  // other way they may change is if they had leaders, and those leaders
+  // disappear.  However, at the point they have leaders, there are uses of the
+  // relevant operands in the created phi node, and so they will get reprocessed
+  // through the normal user marking we perform.
+  DenseMap<const Value *, SmallPtrSet<Value *, 2>> AdditionalUsers;
+  DenseMap<const Expression *, SmallPtrSet<Instruction *, 2>>
+      ExpressionToPhiOfOps;
+
+  // Map from basic block to the temporary operations we created
+  DenseMap<const BasicBlock *, SmallVector<Instruction *, 8>> PHIOfOpsPHIs;
+  // Map from temporary operation to MemoryAccess.
+  DenseMap<const Instruction *, MemoryUseOrDef *> TempToMemory;
+  // Set of all temporary instructions we created.
+  DenseSet<Instruction *> AllTempInstructions;
 
   // Mapping from predicate info we used to the instructions we used it with.
   // In order to correctly ensure propagation, we must keep track of what
@@ -456,8 +492,8 @@
   enum MemoryPhiState { MPS_Invalid, MPS_TOP, MPS_Equivalent, MPS_Unique };
   DenseMap<const MemoryPhi *, MemoryPhiState> MemoryPhiState;
 
-  enum PhiCycleState { PCS_Unknown, PCS_CycleFree, PCS_Cycle };
-  DenseMap<const PHINode *, PhiCycleState> PhiCycleState;
+  enum InstCycleState { ICS_Unknown, ICS_CycleFree, ICS_Cycle };
+  DenseMap<const Instruction *, InstCycleState> InstCycleState;
   // Expression to class mapping.
   using ExpressionClassMap = DenseMap<const Expression *, CongruenceClass *>;
   ExpressionClassMap ExpressionToClass;
@@ -514,7 +550,7 @@
   const Expression *createExpression(Instruction *);
   const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *);
   PHIExpression *createPHIExpression(Instruction *, bool &HasBackedge,
-                                     bool &AllConstant);
+                                     bool &OriginalOpsConstant);
   const VariableExpression *createVariableExpression(Value *);
   const ConstantExpression *createConstantExpression(Constant *);
   const Expression *createVariableOrConstant(Value *V);
@@ -553,6 +589,9 @@
     return CClass;
   }
   void initializeCongruenceClasses(Function &F);
+  const Expression *makePossiblePhiOfOps(Instruction *, bool,
+                                         SmallPtrSetImpl<Value *> &);
+  void addPhiOfOps(PHINode *Op, BasicBlock *BB, Instruction *ExistingValue);
 
   // Value number an Instruction or MemoryPhi.
   void valueNumberMemoryPhi(MemoryPhi *);
@@ -561,7 +600,8 @@
   // Symbolic evaluation.
   const Expression *checkSimplificationResults(Expression *, Instruction *,
                                                Value *);
-  const Expression *performSymbolicEvaluation(Value *);
+  const Expression *performSymbolicEvaluation(Value *,
+                                              SmallPtrSetImpl<Value *> &);
   const Expression *performSymbolicLoadCoercion(Type *, Value *, LoadInst *,
                                                 Instruction *, MemoryAccess *);
   const Expression *performSymbolicLoadEvaluation(Instruction *);
@@ -585,7 +625,7 @@
   bool setMemoryClass(const MemoryAccess *From, CongruenceClass *To);
   CongruenceClass *getMemoryClass(const MemoryAccess *MA) const;
   const MemoryAccess *lookupMemoryLeader(const MemoryAccess *) const;
-  bool isMemoryAccessTop(const MemoryAccess *) const;
+  bool isMemoryAccessTOP(const MemoryAccess *) const;
 
   // Ranking
   unsigned int getRank(const Value *) const;
@@ -609,6 +649,7 @@
   void replaceInstruction(Instruction *, Value *);
   void markInstructionForDeletion(Instruction *);
   void deleteInstructionsInBlock(BasicBlock *);
+  Value *findPhiOfOpsLeader(const Expression *E, const BasicBlock *BB) const;
 
   // New instruction creation.
   void handleNewInstruction(Instruction *){};
@@ -620,8 +661,10 @@
   void markPredicateUsersTouched(Instruction *);
   void markValueLeaderChangeTouched(CongruenceClass *CC);
   void markMemoryLeaderChangeTouched(CongruenceClass *CC);
+  void markPhiOfOpsChanged(const Expression *E);
   void addPredicateUsers(const PredicateBase *, Instruction *);
   void addMemoryUsers(const MemoryAccess *To, MemoryAccess *U);
+  void addAdditionalUsers(Value *To, Value *User);
 
   // Main loop of value numbering
   void iterateTouchedInstructions();
@@ -629,12 +672,16 @@
   // Utilities.
   void cleanupTables();
   std::pair<unsigned, unsigned> assignDFSNumbers(BasicBlock *, unsigned);
-  void updateProcessedCount(Value *V);
+  void updateProcessedCount(const Value *V);
   void verifyMemoryCongruency() const;
   void verifyIterationSettled(Function &F);
   bool singleReachablePHIPath(const MemoryAccess *, const MemoryAccess *) const;
   BasicBlock *getBlockForValue(Value *V) const;
   void deleteExpression(const Expression *E);
+  MemoryUseOrDef *getMemoryAccess(const Instruction *) const;
+  MemoryAccess *getDefiningAccess(const MemoryAccess *) const;
+  MemoryPhi *getMemoryAccess(const BasicBlock *) const;
+  template <class T, class Range> T *getMinDFSOfRange(const Range &) const;
   unsigned InstrToDFSNum(const Value *V) const {
     assert(isa<Instruction>(V) && "This should not be used for MemoryAccesses");
     return InstrDFS.lookup(V);
@@ -654,8 +701,8 @@
                ? InstrToDFSNum(cast<MemoryUseOrDef>(MA)->getMemoryInst())
                : InstrDFS.lookup(MA);
   }
-  bool isCycleFree(const PHINode *PN);
-  template <class T, class Range> T *getMinDFSOfRange(const Range &) const;
+  bool isCycleFree(const Instruction *);
+  bool isBackedge(BasicBlock *From, BasicBlock *To) const;
   // Debug counter info.  When verifying, we have to reset the value numbering
   // debug counter to the same state it started in to get the same results.
   std::pair<int, int> StartingVNCounter;
@@ -683,20 +730,46 @@
   return true;
 }
 
+// Determine if the edge From->To is a backedge
+bool NewGVN::isBackedge(BasicBlock *From, BasicBlock *To) const {
+  if (From == To)
+    return true;
+  auto *FromDTN = DT->getNode(From);
+  auto *ToDTN = DT->getNode(To);
+  return RPOOrdering.lookup(FromDTN) >= RPOOrdering.lookup(ToDTN);
+}
+
 #ifndef NDEBUG
 static std::string getBlockName(const BasicBlock *B) {
   return DOTGraphTraits<const Function *>::getSimpleNodeLabel(B, nullptr);
 }
 #endif
 
+// Get a MemoryAccess for an instruction, fake or real.
+MemoryUseOrDef *NewGVN::getMemoryAccess(const Instruction *I) const {
+  auto *Result = MSSA->getMemoryAccess(I);
+  return Result ? Result : TempToMemory.lookup(I);
+}
+
+// Get a MemoryPhi for a basic block. These are all real.
+MemoryPhi *NewGVN::getMemoryAccess(const BasicBlock *BB) const {
+  return MSSA->getMemoryAccess(BB);
+}
+
 // Get the basic block from an instruction/memory value.
 BasicBlock *NewGVN::getBlockForValue(Value *V) const {
-  if (auto *I = dyn_cast<Instruction>(V))
-    return I->getParent();
-  else if (auto *MP = dyn_cast<MemoryPhi>(V))
-    return MP->getBlock();
-  llvm_unreachable("Should have been able to figure out a block for our value");
-  return nullptr;
+  if (auto *I = dyn_cast<Instruction>(V)) {
+    auto *Parent = I->getParent();
+    if (Parent)
+      return Parent;
+    Parent = TempToBlock.lookup(V);
+    assert(Parent && "Every fake instruction should have a block");
+    return Parent;
+  }
+
+  auto *MP = dyn_cast<MemoryPhi>(V);
+  assert(MP && "Should have been an instruction or a MemoryPhi");
+  return MP->getBlock();
 }
 
 // Delete a definitely dead expression, so it can be reused by the expression
@@ -708,10 +781,9 @@
   const_cast<BasicExpression *>(BE)->deallocateOperands(ArgRecycler);
   ExpressionAllocator.Deallocate(E);
 }
-
 PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
-                                           bool &AllConstant) {
-  BasicBlock *PHIBlock = I->getParent();
+                                           bool &OriginalOpsConstant) {
+  BasicBlock *PHIBlock = getBlockForValue(I);
   auto *PN = cast<PHINode>(I);
   auto *E =
       new (ExpressionAllocator) PHIExpression(PN->getNumOperands(), PHIBlock);
@@ -720,24 +792,22 @@
   E->setType(I->getType());
   E->setOpcode(I->getOpcode());
 
-  unsigned PHIRPO = RPOOrdering.lookup(DT->getNode(PHIBlock));
-
   // Filter out unreachable phi operands.
   auto Filtered = make_filter_range(PN->operands(), [&](const Use &U) {
     return ReachableEdges.count({PN->getIncomingBlock(U), PHIBlock});
   });
-
   std::transform(Filtered.begin(), Filtered.end(), op_inserter(E),
                  [&](const Use &U) -> Value * {
                    auto *BB = PN->getIncomingBlock(U);
-                   auto *DTN = DT->getNode(BB);
-                   if (RPOOrdering.lookup(DTN) >= PHIRPO)
-                     HasBackedge = true;
-                   AllConstant &= isa<UndefValue>(U) || isa<Constant>(U);
+                   HasBackedge = HasBackedge || isBackedge(BB, PHIBlock);
+                   OriginalOpsConstant =
+                       OriginalOpsConstant && isa<Constant>(U);
 
                    // Don't try to transform self-defined phis.
-                   if (U == PN)
+                   if (U == PN) {
                      return PN;
+                   }
+
                    return lookupOperandLeader(U);
                  });
   return E;
@@ -758,7 +828,7 @@
   // whether all members are constant.
   std::transform(I->op_begin(), I->op_end(), op_inserter(E), [&](Value *O) {
     auto Operand = lookupOperandLeader(O);
-    AllConstant &= isa<Constant>(Operand);
+    AllConstant = AllConstant && isa<Constant>(Operand);
     return Operand;
   });
 
@@ -1026,7 +1096,7 @@
 // Return true if the MemoryAccess is really equivalent to everything. This is
 // equivalent to the lattice value "TOP" in most lattices.  This is the initial
 // state of all MemoryAccesses.
-bool NewGVN::isMemoryAccessTop(const MemoryAccess *MA) const {
+bool NewGVN::isMemoryAccessTOP(const MemoryAccess *MA) const {
   return getMemoryClass(MA) == TOPClass;
 }
 
@@ -1072,7 +1142,7 @@
   // Unlike loads, we never try to eliminate stores, so we do not check if they
   // are simple and avoid value numbering them.
   auto *SI = cast<StoreInst>(I);
-  auto *StoreAccess = MSSA->getMemoryAccess(SI);
+  auto *StoreAccess = getMemoryAccess(SI);
   // Get the expression, if any, for the RHS of the MemoryDef.
   const MemoryAccess *StoreRHS = StoreAccess->getDefiningAccess();
   if (EnableStoreRefinement)
@@ -1109,7 +1179,7 @@
             dyn_cast<LoadInst>(lookupOperandLeader(SI->getValueOperand()))) {
       if ((lookupOperandLeader(LI->getPointerOperand()) ==
            lookupOperandLeader(SI->getPointerOperand())) &&
-          (lookupMemoryLeader(MSSA->getMemoryAccess(LI)->getDefiningAccess()) ==
+          (lookupMemoryLeader(getMemoryAccess(LI)->getDefiningAccess()) ==
            StoreRHS))
         return createVariableExpression(LI);
     }
@@ -1213,8 +1283,9 @@
   // Load of undef is undef.
   if (isa<UndefValue>(LoadAddressLeader))
     return createConstantExpression(UndefValue::get(LI->getType()));
-
-  MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(I);
+  MemoryAccess *OriginalAccess = getMemoryAccess(I);
+  MemoryAccess *DefiningAccess =
+      MSSAWalker->getClobberingMemoryAccess(OriginalAccess);
 
   if (!MSSA->isLiveOnEntryDef(DefiningAccess)) {
     if (auto *MD = dyn_cast<MemoryDef>(DefiningAccess)) {
@@ -1403,34 +1474,37 @@
   return Changed;
 }
 
-// Determine if a phi is cycle-free.  That means the values in the phi don't
-// depend on any expressions that can change value as a result of the phi.
-// For example, a non-cycle free phi would be  v = phi(0, v+1).
-bool NewGVN::isCycleFree(const PHINode *PN) {
-  // In order to compute cycle-freeness, we do SCC finding on the phi, and see
+// Determine if a instruction is cycle-free.  That means the values in the
+// instruction don't
+// depend on any expressions that can change value as a result of the
+// instruction.
+// For example, a non-cycle free instruction would be  v = phi(0, v+1).
+bool NewGVN::isCycleFree(const Instruction *I) {
+  // In order to compute cycle-freeness, we do SCC finding on the instruction,
+  // and see
   // what kind of SCC it ends up in.  If it is a singleton, it is cycle-free.
   // If it is not in a singleton, it is only cycle free if the other members are
   // all phi nodes (as they do not compute anything, they are copies).  TODO:
   // There are likely a few other intrinsics or expressions that could be
   // included here, but this happens so infrequently already that it is not
   // likely to be worth it.
-  auto PCS = PhiCycleState.lookup(PN);
-  if (PCS == PCS_Unknown) {
-    SCCFinder.Start(PN);
-    auto &SCC = SCCFinder.getComponentFor(PN);
+  auto ICS = InstCycleState.lookup(I);
+  if (ICS == ICS_Unknown) {
+    SCCFinder.Start(I);
+    auto &SCC = SCCFinder.getComponentFor(I);
     // It's cycle free if it's size 1 or or the SCC is *only* phi nodes.
     if (SCC.size() == 1)
-      PhiCycleState.insert({PN, PCS_CycleFree});
+      InstCycleState.insert({I, ICS_CycleFree});
     else {
       bool AllPhis =
           llvm::all_of(SCC, [](const Value *V) { return isa<PHINode>(V); });
-      PCS = AllPhis ? PCS_CycleFree : PCS_Cycle;
+      ICS = AllPhis ? ICS_CycleFree : ICS_Cycle;
       for (auto *Member : SCC)
         if (auto *MemberPhi = dyn_cast<PHINode>(Member))
-          PhiCycleState.insert({MemberPhi, PCS});
+          InstCycleState.insert({MemberPhi, ICS});
     }
   }
-  if (PCS == PCS_Cycle)
+  if (ICS == ICS_Cycle)
     return false;
   return true;
 }
@@ -1495,8 +1569,18 @@
       // constants, or all operands are ignored but the undef, it also must be
       // cycle free.
       if (!AllConstant && HasBackedge && NumOps > 0 &&
-          !isa<UndefValue>(AllSameValue) && !isCycleFree(cast<PHINode>(I)))
-        return E;
+          !isa<UndefValue>(AllSameValue)) {
+        if (!isCycleFree(I))
+          return E;
+#if 0
+        // Note: For a phi of ops, it can only be cycle free if it is cycle
+        // free, *and* the instruction it represents is cycle free, since we
+        // have implicit copies due to assigning the same congruence class.
+        if (AllTempInstructions.count(I) &&
+            !isCycleFree(cast<Instruction>(TempToReal.lookup(I))))
+          return E;
+#endif
+      }
 
       // Only have to check for instructions
       if (auto *AllSameInst = dyn_cast<Instruction>(AllSameValue))
@@ -1665,8 +1749,17 @@
   return createExpression(I);
 }
 
+// Return true if V is a value that will always be available (IE can
+// be placed anywhere) in the function.  We don't do globals here
+// because they are often worse to put in place.
+// TODO: Separate cost from availability
+static bool alwaysAvailable(Value *V) {
+  return isa<Constant>(V) || isa<Argument>(V);
+}
+
 // Substitute and symbolize the value before value numbering.
-const Expression *NewGVN::performSymbolicEvaluation(Value *V) {
+const Expression *
+NewGVN::performSymbolicEvaluation(Value *V, SmallPtrSetImpl<Value *> &Visited) {
   const Expression *E = nullptr;
   if (auto *C = dyn_cast<Constant>(V))
     E = createConstantExpression(C);
@@ -1740,16 +1833,34 @@
     default:
       return nullptr;
     }
+
+    if (E && !isa<ConstantExpression>(E) && !isa<VariableExpression>(E) &&
+        PHINodeUses.count(I)) {
+      // FIXME: Backedge argument
+      auto *PHIE = makePossiblePhiOfOps(I, false, Visited);
+      if (PHIE)
+        return PHIE;
+    }
   }
   return E;
 }
 
+void NewGVN::addAdditionalUsers(Value *To, Value *User) {
+  AdditionalUsers[To].insert(User);
+}
+
 void NewGVN::markUsersTouched(Value *V) {
   // Now mark the users as touched.
   for (auto *User : V->users()) {
     assert(isa<Instruction>(User) && "Use of value not within an instruction?");
     TouchedInstructions.set(InstrToDFSNum(User));
   }
+  const auto Result = AdditionalUsers.find(V);
+  if (Result != AdditionalUsers.end()) {
+    for (auto *User : Result->second)
+      TouchedInstructions.set(InstrToDFSNum(User));
+    AdditionalUsers.erase(Result);
+  }
 }
 
 void NewGVN::addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) {
@@ -1776,6 +1887,10 @@
 
 // Add I to the set of users of a given predicate.
 void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) {
+  // This is a slight hack.  When doing phi of ops, we set the instruction
+  // numbers to be the right one, and then this ensures we don't add any deleted
+  // operands to the predicate user list.
+  I = cast<Instruction>(InstrFromDFSNum(InstrToDFSNum(I)));
   if (auto *PBranch = dyn_cast<PredicateBranch>(PB))
     PredicateToUsers[PBranch->Condition].insert(I);
   else if (auto *PAssume = dyn_cast<PredicateBranch>(PB))
@@ -1832,11 +1947,11 @@
          "Can't get next leader if there is none");
   if (CC->getStoreCount() > 0) {
     if (auto *NL = dyn_cast_or_null<StoreInst>(CC->getNextLeader().first))
-      return MSSA->getMemoryAccess(NL);
+      return getMemoryAccess(NL);
     // Find the store with the minimum DFS number.
     auto *V = getMinDFSOfRange<Value>(make_filter_range(
         *CC, [&](const Value *V) { return isa<StoreInst>(V); }));
-    return MSSA->getMemoryAccess(cast<StoreInst>(V));
+    return getMemoryAccess(cast<StoreInst>(V));
   }
   assert(CC->getStoreCount() == 0);
 
@@ -1920,32 +2035,11 @@
                                            CongruenceClass *NewClass) {
   if (I == OldClass->getNextLeader().first)
     OldClass->resetNextLeader();
-
-  // It's possible, though unlikely, for us to discover equivalences such
-  // that the current leader does not dominate the old one.
-  // This statistic tracks how often this happens.
-  // We assert on phi nodes when this happens, currently, for debugging, because
-  // we want to make sure we name phi node cycles properly.
-  if (isa<Instruction>(NewClass->getLeader()) && NewClass->getLeader() &&
-      I != NewClass->getLeader()) {
-    auto *IBB = I->getParent();
-    auto *NCBB = cast<Instruction>(NewClass->getLeader())->getParent();
-    bool Dominated =
-        IBB == NCBB && InstrToDFSNum(I) < InstrToDFSNum(NewClass->getLeader());
-    Dominated = Dominated || DT->properlyDominates(IBB, NCBB);
-    if (Dominated) {
-      ++NumGVNNotMostDominatingLeader;
-      assert(
-          !isa<PHINode>(I) &&
-          "New class for instruction should not be dominated by instruction");
-    }
-  }
-
+  OldClass->erase(I);
+  NewClass->insert(I);
   if (NewClass->getLeader() != I)
     NewClass->addPossibleNextLeader({I, InstrToDFSNum(I)});
 
-  OldClass->erase(I);
-  NewClass->insert(I);
   // Handle our special casing of stores.
   if (auto *SI = dyn_cast<StoreInst>(I)) {
     OldClass->decStoreCount();
@@ -1980,7 +2074,7 @@
   // instructions before.
 
   // If it's not a memory use, set the MemoryAccess equivalence
-  auto *InstMA = dyn_cast_or_null<MemoryDef>(MSSA->getMemoryAccess(I));
+  auto *InstMA = dyn_cast_or_null<MemoryDef>(getMemoryAccess(I));
   bool InstWasMemoryLeader = InstMA && OldClass->getMemoryLeader() == InstMA;
   if (InstMA)
     moveMemoryToNewCongruenceClass(I, InstMA, OldClass, NewClass);
@@ -2038,20 +2132,33 @@
   }
 }
 
+// For a given expression, mark the phi of ops instructions that could have
+// changed as a result.
+void NewGVN::markPhiOfOpsChanged(const Expression *E) {
+  auto PhiOfOpsSet = ExpressionToPhiOfOps.find(E);
+  if (PhiOfOpsSet != ExpressionToPhiOfOps.end()) {
+    for (auto I : PhiOfOpsSet->second)
+      TouchedInstructions.set(InstrToDFSNum(I));
+    ExpressionToPhiOfOps.erase(PhiOfOpsSet);
+  }
+}
+
 // Perform congruence finding on a given value numbering expression.
 void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {
   ValueToExpression[I] = E;
+
+  assert(!TempToReal.lookup(I) &&
+         "Should not congruence find fake instructions");
   // This is guaranteed to return something, since it will at least find
   // TOP.
-
   CongruenceClass *IClass = ValueToClass[I];
   assert(IClass && "Should have found a IClass");
   // Dead classes should have been eliminated from the mapping.
   assert(!IClass->isDead() && "Found a dead class");
 
-  CongruenceClass *EClass;
+  CongruenceClass *EClass = nullptr;
   if (const auto *VE = dyn_cast<VariableExpression>(E)) {
-    EClass = ValueToClass[VE->getVariableValue()];
+    EClass = ValueToClass.lookup(VE->getVariableValue());
   } else {
     auto lookupResult = ExpressionToClass.insert({E, nullptr});
 
@@ -2102,10 +2209,13 @@
   if (ClassChanged || LeaderChanged) {
     DEBUG(dbgs() << "New class " << EClass->getID() << " for expression " << *E
                  << "\n");
-    if (ClassChanged)
+    if (ClassChanged) {
       moveValueToNewCongruenceClass(I, E, IClass, EClass);
+      markPhiOfOpsChanged(E);
+    }
+
     markUsersTouched(I);
-    if (MemoryAccess *MA = MSSA->getMemoryAccess(I))
+    if (MemoryAccess *MA = getMemoryAccess(I))
       markMemoryUsersTouched(MA);
     if (auto *CI = dyn_cast<CmpInst>(I))
       markPredicateUsersTouched(CI);
@@ -2131,7 +2241,7 @@
       // impact predicates. Otherwise, only mark the phi nodes as touched, as
       // they are the only thing that depend on new edges. Anything using their
       // values will get propagated to if necessary.
-      if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(To))
+      if (MemoryAccess *MemPhi = getMemoryAccess(To))
         TouchedInstructions.set(InstrToDFSNum(MemPhi));
 
       auto BI = To->begin();
@@ -2139,6 +2249,12 @@
         TouchedInstructions.set(InstrToDFSNum(&*BI));
         ++BI;
       }
+      const auto PHIResult = PHIOfOpsPHIs.find(To);
+      if (PHIResult != PHIOfOpsPHIs.end()) {
+        const auto &PHIs = PHIResult->second;
+        for (auto I : PHIs)
+          TouchedInstructions.set(InstrToDFSNum(I));
+      }
     }
   }
 }
@@ -2228,7 +2344,7 @@
     // This also may be a memory defining terminator, in which case, set it
     // equivalent only to itself.
     //
-    auto *MA = MSSA->getMemoryAccess(TI);
+    auto *MA = getMemoryAccess(TI);
     if (MA && !isa<MemoryUse>(MA)) {
       auto *CC = ensureLeaderOfMemoryClass(MA);
       if (setMemoryClass(MA, CC))
@@ -2237,6 +2353,152 @@
   }
 }
 
+void NewGVN::addPhiOfOps(PHINode *Op, BasicBlock *BB,
+                         Instruction *ExistingValue) {
+  InstrDFS[Op] = InstrToDFSNum(ExistingValue);
+  // InstrDFS[Op] = MaxDFSNum++;
+  //  DFSToInstr.push_back(Op);
+  AllTempInstructions.insert(Op);
+  PHIOfOpsPHIs[BB].push_back(Op);
+  TempToBlock[Op] = BB;
+  if (ExistingValue) {
+    RealToTemp[ExistingValue] = Op;
+    TempToReal[Op] = ExistingValue;
+  }
+}
+
+static bool okayForPHIOfOps(const Instruction *I) {
+  return isa<BinaryOperator>(I) || isa<SelectInst>(I) || isa<CmpInst>(I);
+}
+
+// When we see an instruction that is an op of phis, generate the equivalent phi
+// of ops form.
+const Expression *
+NewGVN::makePossiblePhiOfOps(Instruction *I, bool HasBackedge,
+                             SmallPtrSetImpl<Value *> &Visited) {
+  if (!okayForPHIOfOps(I))
+    return nullptr;
+
+  if (!Visited.insert(I).second)
+    return nullptr;
+  // For now, we require the instruction be cycle free because we don't
+  // *always* create a phi of ops for instructions that could be done as phi
+  // of ops, we only do it if we think it is useful.  If we did do it all the
+  // time, we could remove the cycle free check.
+  if (!isCycleFree(I))
+    return nullptr;
+
+  unsigned IDFSNum = InstrToDFSNum(I);
+  // Pretty much all of the instructions we can convert to phi of ops over a
+  // backedge that are adds, are really induction variables, and those are
+  // pretty much pointless to convert.  This is very coarse-grained for a
+  // test, so if we do find some value, we can change it later.
+  // But otherwise, what can happen is we convert the induction variable from
+  //
+  // i = phi (0, tmp)
+  // tmp = i + 1
+  //
+  // to
+  // i = phi (0, tmpphi)
+  // tmpphi = phi(1, tmpphi+1)
+  //
+  // Which we don't want to happen.  We could just avoid this for all non-cycle
+  // free phis, and we made go that route.
+  if (HasBackedge && I->getOpcode() == Instruction::Add)
+    return nullptr;
+
+  SmallPtrSet<const Value *, 8> ProcessedPHIs;
+  // TODO: We don't do phi translation on memory accesses because it's
+  // complicated. For a load, we'd need to be able to simulate a new memoryuse,
+  // which we don't have a good way of doing ATM.
+  auto *MemAccess = getMemoryAccess(I);
+  // If the memory operation is defined by a memory operation this block that
+  // isn't a MemoryPhi, transforming the pointer backwards through a scalar phi
+  // can't help, as it would still be killed by that memory operation.
+  if (MemAccess && !isa<MemoryPhi>(MemAccess->getDefiningAccess()) &&
+      MemAccess->getDefiningAccess()->getBlock() == I->getParent())
+    return nullptr;
+
+  // Convert op of phis to phi of ops
+  for (auto &Op : I->operands()) {
+    if (!isa<PHINode>(Op))
+      continue;
+    auto *OpPHI = cast<PHINode>(Op);
+    // No point in doing this for one-operand phis.
+    if (OpPHI->getNumOperands() == 1)
+      continue;
+    if (!DebugCounter::shouldExecute(PHIOfOpsCounter))
+      return nullptr;
+    SmallVector<std::pair<Value *, BasicBlock *>, 4> Ops;
+    auto *PHIBlock = getBlockForValue(OpPHI);
+    for (auto PredBB : OpPHI->blocks()) {
+      Value *FoundVal = nullptr;
+      // We could just skip unreachable edges entirely but it's tricky to do
+      // with existing phi nodes.
+      if (ReachableEdges.count({PredBB, PHIBlock})) {
+        Instruction *ValueOp = I->clone();
+        auto Iter = TempToMemory.end();
+        if (MemAccess)
+          Iter = TempToMemory.insert({ValueOp, MemAccess}).first;
+
+        for (auto &Op : ValueOp->operands()) {
+          Op = Op->DoPHITranslation(PHIBlock, PredBB);
+          addAdditionalUsers(Op, I);
+        }
+        InstrDFS.insert({ValueOp, IDFSNum});
+        const Expression *E = performSymbolicEvaluation(ValueOp, Visited);
+        InstrDFS.erase(ValueOp);
+        delete ValueOp;
+
+        if (MemAccess)
+          TempToMemory.erase(Iter);
+        if (!E)
+          return nullptr;
+        if (!isa<ConstantExpression>(E))
+          ExpressionToPhiOfOps[E].insert(I);
+        FoundVal = findPhiOfOpsLeader(E, PredBB);
+        if (!FoundVal)
+          return nullptr;
+      } else {
+        DEBUG(dbgs() << "Skipping phi of ops operand for incoming block "
+                     << getBlockName(PredBB)
+                     << " because the block is unreachable\n");
+        FoundVal = UndefValue::get(I->getType());
+      }
+
+      Ops.push_back({FoundVal, PredBB});
+      DEBUG(dbgs() << "Found phi of ops operand " << *FoundVal << " in "
+                   << getBlockName(PredBB) << "\n");
+    }
+    auto *ValuePHI = RealToTemp.lookup(I);
+    bool NewPHI = false;
+    if (!ValuePHI) {
+      ValuePHI = PHINode::Create(I->getType(), OpPHI->getNumOperands());
+      addPhiOfOps(ValuePHI, PHIBlock, I);
+      NewPHI = true;
+    }
+    if (NewPHI) {
+
+      for (auto PHIOp : Ops)
+        ValuePHI->addIncoming(PHIOp.first, PHIOp.second);
+    }
+
+    else {
+      unsigned int i = 0;
+      for (auto PHIOp : Ops) {
+        ValuePHI->setIncomingValue(i, PHIOp.first);
+        ValuePHI->setIncomingBlock(i, PHIOp.second);
+        ++i;
+      }
+    }
+
+    DEBUG(dbgs() << "Created phi of ops " << *ValuePHI << " for " << *I
+                 << "\n");
+    return performSymbolicEvaluation(ValuePHI, Visited);
+  }
+  return nullptr;
+}
+
 // The algorithm initially places the values of the routine in the TOP
 // congruence class. The leader of TOP is the undetermined value `undef`.
 // When the algorithm has finished, values still in TOP are unreachable.
@@ -2279,6 +2541,12 @@
           TOPClass->incStoreCount();
       }
     for (auto &I : *BB) {
+      // TODO: Move to helper
+      if (isa<PHINode>(&I))
+        for (auto *U : I.users())
+          if (auto *UInst = dyn_cast<Instruction>(U))
+            if (InstrToDFSNum(UInst) != 0 && okayForPHIOfOps(UInst))
+              PHINodeUses.insert(UInst);
       // Don't insert void terminators into the class. We don't value number
       // them, and they just end up sitting in TOP.
       if (isa<TerminatorInst>(I) && I.getType()->isVoidTy())
@@ -2303,12 +2571,36 @@
     CongruenceClasses[i] = nullptr;
   }
 
+  // Destroy the value expressions
+  SmallVector<Instruction *, 8> TempInst(AllTempInstructions.begin(),
+                                         AllTempInstructions.end());
+  AllTempInstructions.clear();
+
+  // We have to drop all references for everything first, so there are no uses
+  // left as we delete them.
+  for (auto *I : TempInst) {
+    I->dropAllReferences();
+  }
+
+  while (!TempInst.empty()) {
+    auto *I = TempInst.back();
+    TempInst.pop_back();
+    delete I;
+  }
+
   ValueToClass.clear();
   ArgRecycler.clear(ExpressionAllocator);
   ExpressionAllocator.Reset();
   CongruenceClasses.clear();
   ExpressionToClass.clear();
   ValueToExpression.clear();
+  RealToTemp.clear();
+  TempToReal.clear();
+  AdditionalUsers.clear();
+  ExpressionToPhiOfOps.clear();
+  TempToBlock.clear();
+  TempToMemory.clear();
+  PHIOfOpsPHIs.clear();
   ReachableBlocks.clear();
   ReachableEdges.clear();
 #ifndef NDEBUG
@@ -2324,14 +2616,17 @@
   MemoryToUsers.clear();
 }
 
+// Assign local DFS number mapping to instructions, and leave space for Value
+// PHI's.
 std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B,
                                                        unsigned Start) {
   unsigned End = Start;
-  if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(B)) {
+  if (MemoryAccess *MemPhi = getMemoryAccess(B)) {
     InstrDFS[MemPhi] = End++;
     DFSToInstr.emplace_back(MemPhi);
   }
 
+  // Then the real block goes next.
   for (auto &I : *B) {
     // There's no need to call isInstructionTriviallyDead more than once on
     // an instruction. Therefore, once we know that an instruction is dead
@@ -2342,7 +2637,6 @@
       markInstructionForDeletion(&I);
       continue;
     }
-
     InstrDFS[&I] = End++;
     DFSToInstr.emplace_back(&I);
   }
@@ -2353,7 +2647,7 @@
   return std::make_pair(Start, End);
 }
 
-void NewGVN::updateProcessedCount(Value *V) {
+void NewGVN::updateProcessedCount(const Value *V) {
 #ifndef NDEBUG
   if (ProcessedCount.count(V) == 0) {
     ProcessedCount.insert({V, 1});
@@ -2372,7 +2666,7 @@
   const BasicBlock *PHIBlock = MP->getBlock();
   auto Filtered = make_filter_range(MP->operands(), [&](const Use &U) {
     return lookupMemoryLeader(cast<MemoryAccess>(U)) != MP &&
-           !isMemoryAccessTop(cast<MemoryAccess>(U)) &&
+           !isMemoryAccessTOP(cast<MemoryAccess>(U)) &&
            ReachableEdges.count({MP->getIncomingBlock(U), PHIBlock});
   });
   // If all that is left is nothing, our memoryphi is undef. We keep it as
@@ -2425,18 +2719,17 @@
   DEBUG(dbgs() << "Processing instruction " << *I << "\n");
   if (!I->isTerminator()) {
     const Expression *Symbolized = nullptr;
+    SmallPtrSet<Value *, 2> Visited;
     if (DebugCounter::shouldExecute(VNCounter)) {
-      Symbolized = performSymbolicEvaluation(I);
+      Symbolized = performSymbolicEvaluation(I, Visited);
     } else {
       // Mark the instruction as unused so we don't value number it again.
       InstrDFS[I] = 0;
     }
     // If we couldn't come up with a symbolic expression, use the unknown
     // expression
-    if (Symbolized == nullptr) {
+    if (Symbolized == nullptr)
       Symbolized = createUnknownExpression(I);
-    }
-
     performCongruenceFinding(I, Symbolized);
   } else {
     // Handle terminators that return values. All of them produce values we
@@ -2527,13 +2820,15 @@
           return false;
         if (auto *MemDef = dyn_cast<MemoryDef>(Pair.first))
           return !isInstructionTriviallyDead(MemDef->getMemoryInst());
+        if (auto *MemPhi = dyn_cast<MemoryPhi>(Pair.first))
+          return all_of(MemPhi->operands(), [&](const Value *V) {
+            return isMemoryAccessTOP(cast<MemoryAccess>(V));
+          });
         return true;
       };
 
   auto Filtered = make_filter_range(MemoryAccessToClass, ReachableAccessPred);
   for (auto KV : Filtered) {
-    assert(KV.second != TOPClass &&
-           "Memory not unreachable but ended up in TOP");
     if (auto *FirstMUD = dyn_cast<MemoryUseOrDef>(KV.first)) {
       auto *SecondMUD = dyn_cast<MemoryUseOrDef>(KV.second->getMemoryLeader());
       if (FirstMUD && SecondMUD)
@@ -2627,7 +2922,7 @@
   // Nothing set, nothing to iterate, just return.
   if (FirstInstr == -1)
     return;
-  BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr));
+  const BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr));
   while (TouchedInstructions.any()) {
     ++Iterations;
     // Walk through all the instructions in all the blocks in RPO.
@@ -2645,7 +2940,7 @@
       }
 
       Value *V = InstrFromDFSNum(InstrNum);
-      BasicBlock *CurrBlock = getBlockForValue(V);
+      const BasicBlock *CurrBlock = getBlockForValue(V);
 
       // If we hit a new block, do reachability processing.
       if (CurrBlock != LastBlock) {
@@ -2726,6 +3021,7 @@
     BlockInstRange.insert({B, BlockRange});
     ICount += BlockRange.second - BlockRange.first;
   }
+  initializeCongruenceClasses(F);
 
   TouchedInstructions.resize(ICount);
   // Ensure we don't end up resizing the expressionToClass map, as
@@ -2736,9 +3032,10 @@
   // Initialize the touched instructions to include the entry block.
   const auto &InstRange = BlockInstRange.lookup(&F.getEntryBlock());
   TouchedInstructions.set(InstRange.first, InstRange.second);
+  DEBUG(dbgs() << "Block " << getBlockName(&F.getEntryBlock())
+               << " marked reachable\n");
   ReachableBlocks.insert(&F.getEntryBlock());
 
-  initializeCongruenceClasses(F);
   iterateTouchedInstructions();
   verifyMemoryCongruency();
   verifyIterationSettled(F);
@@ -2750,7 +3047,8 @@
     if (!ToErase->use_empty())
       ToErase->replaceAllUsesWith(UndefValue::get(ToErase->getType()));
 
-    ToErase->eraseFromParent();
+    if (ToErase->getParent())
+      ToErase->eraseFromParent();
   }
 
   // Delete all unreachable blocks.
@@ -2769,14 +3067,6 @@
   return Changed;
 }
 
-// Return true if V is a value that will always be available (IE can
-// be placed anywhere) in the function.  We don't do globals here
-// because they are often worse to put in place.
-// TODO: Separate cost from availability
-static bool alwaysAvailable(Value *V) {
-  return isa<Constant>(V) || isa<Argument>(V);
-}
-
 struct NewGVN::ValueDFS {
   int DFSIn = 0;
   int DFSOut = 0;
@@ -2866,9 +3156,21 @@
     }
     assert(isa<Instruction>(D) &&
            "The dense set member should always be an instruction");
-    VDDef.LocalNum = InstrToDFSNum(D);
-    DFSOrderedSet.emplace_back(VDDef);
     Instruction *Def = cast<Instruction>(D);
+    VDDef.LocalNum = InstrToDFSNum(D);
+    DFSOrderedSet.push_back(VDDef);
+    // If there is a phi node equivalent, add it
+    if (auto *PN = RealToTemp.lookup(Def)) {
+      auto *PHIE =
+          dyn_cast_or_null<PHIExpression>(ValueToExpression.lookup(Def));
+      if (PHIE) {
+        VDDef.Def.setInt(false);
+        VDDef.Def.setPointer(PN);
+        VDDef.LocalNum = 0;
+        DFSOrderedSet.push_back(VDDef);
+      }
+    }
+
     unsigned int UseCount = 0;
     // Now add the uses.
     for (auto &U : Def->uses()) {
@@ -2885,7 +3187,7 @@
           // they are from.
           VDUse.LocalNum = InstrDFS.size() + 1;
         } else {
-          IBlock = I->getParent();
+          IBlock = getBlockForValue(I);
           VDUse.LocalNum = InstrToDFSNum(I);
         }
 
@@ -3055,6 +3357,37 @@
 };
 }
 
+// Given a value and a basic block we are trying to see if it is available in,
+// see if the value has a leader available in that block.
+Value *NewGVN::findPhiOfOpsLeader(const Expression *E,
+                                  const BasicBlock *BB) const {
+  // It would already be constant if we could make it constant
+  if (auto *CE = dyn_cast<ConstantExpression>(E))
+    return CE->getConstantValue();
+  if (auto *VE = dyn_cast<VariableExpression>(E))
+    return VE->getVariableValue();
+
+  auto *CC = ExpressionToClass.lookup(E);
+  if (!CC)
+    return nullptr;
+  if (alwaysAvailable(CC->getLeader()))
+    return CC->getLeader();
+
+  for (auto Member : *CC) {
+    auto *MemberInst = dyn_cast<Instruction>(Member);
+    // Anything that isn't an instruction is always available.
+    if (!MemberInst)
+      return Member;
+    // If we are looking for something in the same block as the member, it must
+    // be a leader because this function is looking for operands for a phi node.
+    if (MemberInst->getParent() == BB ||
+        DT->dominates(MemberInst->getParent(), BB)) {
+      return Member;
+    }
+  }
+  return nullptr;
+}
+
 bool NewGVN::eliminateInstructions(Function &F) {
   // This is a non-standard eliminator. The normal way to eliminate is
   // to walk the dominator tree in order, keeping track of available
@@ -3084,26 +3417,45 @@
   // Since we are going to walk the domtree anyway, and we can't guarantee the
   // DFS numbers are updated, we compute some ourselves.
   DT->updateDFSNumbers();
-
-  for (auto &B : F) {
-    if (!ReachableBlocks.count(&B)) {
-      for (const auto S : successors(&B)) {
-        for (auto II = S->begin(); isa<PHINode>(II); ++II) {
-          auto &Phi = cast<PHINode>(*II);
-          DEBUG(dbgs() << "Replacing incoming value of " << *II << " for block "
-                       << getBlockName(&B)
-                       << " with undef due to it being unreachable\n");
-          for (auto &Operand : Phi.incoming_values())
-            if (Phi.getIncomingBlock(Operand) == &B)
-              Operand.set(UndefValue::get(Phi.getType()));
+  auto ReplaceUnreachablePHIArgs = [&](PHINode &PHI, BasicBlock *BB) {
+    for (auto &Operand : PHI.incoming_values())
+      if (!ReachableEdges.count({PHI.getIncomingBlock(Operand), BB})) {
+        DEBUG(dbgs() << "Replacing incoming value of " << PHI << " for block "
+                     << getBlockName(PHI.getIncomingBlock(Operand))
+                     << " with undef due to it being unreachable\n");
+        Operand.set(UndefValue::get(PHI.getType()));
+      }
+  };
+  SmallPtrSet<BasicBlock *, 8> BlocksWithPhis;
+  for (auto &B : F)
+    if ((!B.empty() && isa<PHINode>(*B.begin())) ||
+        (PHIOfOpsPHIs.find(&B) != PHIOfOpsPHIs.end()))
+      BlocksWithPhis.insert(&B);
+  DenseMap<const BasicBlock *, unsigned> ReachablePredCount;
+  for (auto KV : ReachableEdges)
+    ReachablePredCount[KV.getEnd()]++;
+  for (auto *BB : BlocksWithPhis)
+    // TODO: It would be faster to use getNumIncomingBlocks() on a phi node in
+    // the block and subtract the pred count, but it's more complicated.
+    if (ReachablePredCount.lookup(BB) !=
+        std::distance(pred_begin(BB), pred_end(BB))) {
+      for (auto II = BB->begin(); isa<PHINode>(II); ++II) {
+        auto &PHI = cast<PHINode>(*II);
+        ReplaceUnreachablePHIArgs(PHI, BB);
+      }
+      auto PHIResult = PHIOfOpsPHIs.find(BB);
+      if (PHIResult != PHIOfOpsPHIs.end()) {
+        auto &PHIs = PHIResult->second;
+        for (auto I : PHIs) {
+          auto *PHI = dyn_cast<PHINode>(I);
+          ReplaceUnreachablePHIArgs(*PHI, BB);
         }
       }
     }
-  }
 
   // Map to store the use counts
   DenseMap<const Value *, unsigned int> UseCounts;
-  for (CongruenceClass *CC : reverse(CongruenceClasses)) {
+  for (auto *CC : reverse(CongruenceClasses)) {
     // Track the equivalent store info so we can decide whether to try
     // dead store elimination.
     SmallVector<ValueDFS, 8> PossibleDeadStores;
@@ -3151,7 +3503,7 @@
       DEBUG(dbgs() << "Eliminating in congruence class " << CC->getID()
                    << "\n");
       // If this is a singleton, we can skip it.
-      if (CC->size() != 1) {
+      if (CC->size() != 1 || RealToTemp.lookup(Leader)) {
         // This is a stack because equality replacement/etc may place
         // constants in the middle of the member list, and we want to use
         // those constant values in preference to the current leader, over
@@ -3173,6 +3525,21 @@
           // We ignore void things because we can't get a value from them.
           if (Def && Def->getType()->isVoidTy())
             continue;
+          auto *DefInst = dyn_cast_or_null<Instruction>(Def);
+          if (DefInst && AllTempInstructions.count(DefInst)) {
+            auto *PN = cast<PHINode>(DefInst);
+
+            // If this is a value phi and that's the expression we used, insert
+            // it into the program
+            // remove from temp instruction list.
+            AllTempInstructions.erase(PN);
+            auto *DefBlock = getBlockForValue(Def);
+            DEBUG(dbgs() << "Inserting fully real phi of ops" << *Def
+                         << " into block "
+                         << getBlockName(getBlockForValue(Def)) << "\n");
+            PN->insertBefore(&DefBlock->front());
+            Def = PN;
+          }
 
           if (EliminationStack.empty()) {
             DEBUG(dbgs() << "Elimination Stack is empty\n");
@@ -3331,7 +3698,6 @@
       }
     }
   }
-
   return AnythingReplaced;
 }
 
@@ -3341,11 +3707,12 @@
 // we will simplify an operation with all constants so that it doesn't matter
 // what order they appear in.
 unsigned int NewGVN::getRank(const Value *V) const {
-  // Prefer undef to anything else
+  // Prefer constants to undef to anything else
+  // Undef is a constant, have to check it first.
   if (isa<UndefValue>(V))
-    return 0;
-  if (isa<Constant>(V))
     return 1;
+  if (isa<Constant>(V))
+    return 0;
   else if (auto *A = dyn_cast<Argument>(V))
     return 2 + A->getArgNo();
 
Index: lib/Transforms/Scalar/gvndiff
===================================================================
--- /dev/null
+++ lib/Transforms/Scalar/gvndiff
@@ -0,0 +1,2726 @@
+diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
+index 5aa8be6..7574deb 100644
+--- a/lib/Transforms/Scalar/NewGVN.cpp
++++ b/lib/Transforms/Scalar/NewGVN.cpp
+@@ -72,23 +72,9 @@ using namespace llvm::GVNExpression;
+ 
+ STATISTIC(NumGVNInstrDeleted, "Number of instructions deleted");
+ STATISTIC(NumGVNBlocksDeleted, "Number of blocks deleted");
+-STATISTIC(NumNewGVNPRE, "Number of instructions PRE'd");
+-// STATISTIC(NumNewGVNBlocks, "Number of blocks merged");
+-// STATISTIC(NumNewGVNSimpl, "Number of instructions simplified");
+-STATISTIC(NumGVNEqProp, "Number of equalities propagated");
+-STATISTIC(NumPRELoad, "Number of loads PRE'd");
+ STATISTIC(NumGVNOpsSimplified, "Number of Expressions simplified");
+ STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same");
+ 
+-static cl::opt<bool> EnablePRE("enable-pre2", cl::init(true), cl::Hidden);
+-static cl::opt<bool> EnableLoadPRE("enable-load-pre2", cl::init(true));
+-
+-// Maximum allowed recursion depth.
+-static cl::opt<uint32_t>
+-    MaxRecurseDepth("max-recurse-depth2", cl::Hidden, cl::init(1000),
+-                    cl::ZeroOrMore,
+-                    cl::desc("Max recurse depth (default = 1000)"));
+-
+ //===----------------------------------------------------------------------===//
+ //                                GVN Pass
+ //===----------------------------------------------------------------------===//
+@@ -124,11 +110,6 @@ struct CongruenceClass {
+   const Expression *DefiningExpr;
+   // Actual members of this class.  These are the things the same everywhere
+   MemberSet Members;
+-  // Coercible members of this class. These are loads where we can pull the
+-  // value out of a store. This means they need special processing during
+-  // elimination to do this, but they are otherwise the same as members,
+-  // in particular, we can eliminate one in favor of a dominating one.
+-  MemberSet CoercibleMembers;
+ 
+   // True if this class has no members left.  This is mainly used for assertion
+   // purposes, and for skipping empty classes.
+@@ -140,68 +121,7 @@ struct CongruenceClass {
+       : ID(ID), RepLeader(Leader), DefiningExpr(E), Dead(false) {}
+ };
+ 
+-struct EdgePredicate {
+-  // First OP Second == Val
+-  Value *First;
+-  CmpInst::Predicate Op;
+-  Value *Second;
+-  Value *Val;
+-};
+-
+-bool operator==(const EdgePredicate &LHS, const EdgePredicate &RHS) {
+-  return LHS.First == RHS.First && LHS.Op == RHS.Op &&
+-         LHS.Second == RHS.Second && LHS.Val == RHS.Val;
+-}
+-
+-template <> struct DenseMapInfo<EdgePredicate> {
+-  typedef DenseMapInfo<const Value *> DMI;
+-  static inline const EdgePredicate getEmptyKey() {
+-    return {nullptr, CmpInst::BAD_ICMP_PREDICATE, nullptr, nullptr};
+-  }
+-  static inline const EdgePredicate getTombstoneKey() {
+-    return {nullptr, CmpInst::BAD_FCMP_PREDICATE, nullptr, nullptr};
+-  }
+-  static unsigned getHashValue(const EdgePredicate &V) {
+-
+-    return hash_combine(DMI::getHashValue(V.First),
+-                        static_cast<unsigned long>(V.Op),
+-                        DMI::getHashValue(V.Second), DMI::getHashValue(V.Val));
+-  }
+-  static bool isEqual(const EdgePredicate &First, const EdgePredicate &Second) {
+-    return First == Second;
+-  }
+-};
+-
+-bool operator!=(const EdgePredicate &LHS, const EdgePredicate &RHS) {
+-  return !(LHS == RHS);
+-}
+-
+-template <> struct DenseMapInfo<const Expression *> {
+-  static inline const Expression *getEmptyKey() {
+-    uintptr_t Val = static_cast<uintptr_t>(-1);
+-    Val <<= PointerLikeTypeTraits<const Expression *>::NumLowBitsAvailable;
+-    return reinterpret_cast<const Expression *>(Val);
+-  }
+-  static inline const Expression *getTombstoneKey() {
+-    uintptr_t Val = static_cast<uintptr_t>(-2);
+-    Val <<= PointerLikeTypeTraits<const Expression *>::NumLowBitsAvailable;
+-    return reinterpret_cast<const Expression *>(Val);
+-  }
+-  static unsigned getHashValue(const Expression *V) {
+-    return static_cast<unsigned>(V->getHashValue());
+-  }
+-  static bool isEqual(const Expression *LHS, const Expression *RHS) {
+-    if (LHS == RHS)
+-      return true;
+-    if (LHS == getTombstoneKey() || RHS == getTombstoneKey() ||
+-        LHS == getEmptyKey() || RHS == getEmptyKey())
+-      return false;
+-    return *LHS == *RHS;
+-  }
+-};
+-
+ class NewGVN : public FunctionPass {
+-  MemoryDependenceResults *MD;
+   DominatorTree *DT;
+   const DataLayout *DL;
+   const TargetLibraryInfo *TLI;
+@@ -221,31 +141,32 @@ class NewGVN : public FunctionPass {
+   DenseMap<Value *, CongruenceClass *> ValueToClass;
+   DenseMap<Value *, const Expression *> ValueToExpression;
+ 
+-  struct expression_equal_to {
+-    bool operator()(const Expression *A, const Expression *B) const {
+-      if (A == B)
++  struct ComparingExpressionInfo : public DenseMapInfo<const Expression *> {
++    static unsigned getHashValue(const Expression *V) {
++      return static_cast<unsigned>(V->getHashValue());
++    }
++    static bool isEqual(const Expression *LHS, const Expression *RHS) {
++      if (LHS == RHS)
+         return true;
+-      return *A == *B;
++      if (LHS == getTombstoneKey() || RHS == getTombstoneKey() ||
++          LHS == getEmptyKey() || RHS == getEmptyKey())
++        return false;
++      return *LHS == *RHS;
+     }
+   };
+-  struct hash_expression {
+-    size_t operator()(const Expression *A) const { return A->getHashValue(); }
+-  };
+ 
+   // Expression to class mapping
+-  typedef DenseMap<const Expression *, CongruenceClass *> ExpressionClassMap;
++  typedef DenseMap<const Expression *, CongruenceClass *,
++                   ComparingExpressionInfo>
++      ExpressionClassMap;
+   ExpressionClassMap ExpressionToClass;
+ 
+   // Which values have changed as a result of leader changes
+   SmallPtrSet<Value *, 8> ChangedValues;
+ 
+   // Reachability info
+-  typedef BasicBlockEdge BlockEdge;
+-  DenseMap<BlockEdge, DenseSet<EdgePredicate>> EdgePredicates;
+-  DenseSet<BlockEdge> ReachableEdges;
++  DenseSet<std::pair<BasicBlock *, BasicBlock *>> ReachableEdges;
+   SmallPtrSet<const BasicBlock *, 8> ReachableBlocks;
+-  // Map from a block to which single incoming edge is reachable, if any.
+-  SmallDenseMap<const BasicBlock *, const BasicBlock *> SingleReachableIncoming;
+   // This is a bitvector because, on larger functions, we may have
+   // thousands of touched instructions at once (entire blocks,
+   // instructions with hundreds of uses, etc).  Even with optimization
+@@ -256,9 +177,6 @@ class NewGVN : public FunctionPass {
+   // individual and ranges, as well as "find next element" This
+   // enables us to use it as a worklist with essentially 0 cost.
+   BitVector TouchedInstructions;
+-  // We mark which instructions were affected by an equivalence, so we only have
+-  // to revisit those instructions when an edge change occurs
+-  BitVector InvolvedInEquivalence;
+   DenseMap<const BasicBlock *, std::pair<unsigned, unsigned>> BlockInstRange;
+   DenseMap<const DomTreeNode *, std::pair<unsigned, unsigned>>
+       DominatedInstRange;
+@@ -272,28 +190,17 @@ class NewGVN : public FunctionPass {
+ 
+   // Deletion info
+   SmallPtrSet<Instruction *, 8> InstructionsToErase;
+-  // This is a mapping from Load to (offset into source, coercion source)
+-  DenseMap<const Value *, std::pair<unsigned, Value *>> CoercionInfo;
+-  // This is a mapping for loads that got widened, to the new load. This ensures
+-  // we coerce from the new widened load, instead of the old one. Otherwise, we
+-  // may try to widen the same old load multiple times.
+-  DenseMap<const Value *, Value *> CoercionForwarding;
+-
+-  // This is used by PRE to forward values when they get replaced
+-  // Because we don't update the expressions, ValueToExpression will point to
+-  // expressions which have the old arguments in them
+-  DenseMap<const Value *, Value *> PREValueForwarding;
+-  PredIteratorCache PredCache;
+ 
+ public:
+   static char ID; // Pass identification, replacement for typeid
+-  explicit NewGVN() : FunctionPass(ID), MD(nullptr) {
++  explicit NewGVN() : FunctionPass(ID) {
+     initializeNewGVNPass(*PassRegistry::getPassRegistry());
+   }
+ 
+   bool runOnFunction(Function &F) override;
+   bool runGVN(Function &F, DominatorTree *DT, AssumptionCache *AC,
+-              TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA);
++              TargetLibraryInfo *TLI, AliasAnalysis *AA,
++              MemorySSA *MSSA);
+ 
+ private:
+   // This transformation requires dominator postdominator info
+@@ -315,16 +222,13 @@ private:
+   bool setBasicExpressionInfo(Instruction *, BasicExpression *,
+                               const BasicBlock *);
+   PHIExpression *createPHIExpression(Instruction *);
+-  const VariableExpression *createVariableExpression(Value *, bool);
+-  const ConstantExpression *createConstantExpression(Constant *, bool);
++  const VariableExpression *createVariableExpression(Value *);
++  const ConstantExpression *createConstantExpression(Constant *);
+   const Expression *createVariableOrConstant(Value *V, const BasicBlock *B);
+   const StoreExpression *createStoreExpression(StoreInst *, MemoryAccess *,
+                                                const BasicBlock *);
+   LoadExpression *createLoadExpression(Type *, Value *, LoadInst *,
+                                        MemoryAccess *, const BasicBlock *);
+-  const CoercibleLoadExpression *
+-  createCoercibleLoadExpression(Type *, Value *, LoadInst *, MemoryAccess *,
+-                                unsigned, Value *, const BasicBlock *);
+ 
+   const CallExpression *createCallExpression(CallInst *, MemoryAccess *,
+                                              const BasicBlock *);
+@@ -351,12 +255,6 @@ private:
+   const Expression *checkSimplificationResults(Expression *, Instruction *,
+                                                Value *);
+   const Expression *performSymbolicEvaluation(Value *, const BasicBlock *);
+-  const Expression *performSymbolicLoadCoercionFromPhi(Type *, Value *,
+-                                                       LoadInst *, MemoryPhi *,
+-                                                       const BasicBlock *);
+-  const Expression *performSymbolicLoadCoercion(Type *, Value *, LoadInst *,
+-                                                Instruction *, MemoryAccess *,
+-                                                const BasicBlock *);
+   const Expression *performSymbolicLoadEvaluation(Instruction *,
+                                                   const BasicBlock *);
+   const Expression *performSymbolicStoreEvaluation(Instruction *,
+@@ -367,198 +265,37 @@ private:
+                                                  const BasicBlock *);
+   const Expression *performSymbolicAggrValueEvaluation(Instruction *,
+                                                        const BasicBlock *);
+-  int analyzeLoadFromClobberingStore(Type *, Value *, StoreInst *);
+-  int analyzeLoadFromClobberingLoad(Type *, Value *, LoadInst *);
+-  int analyzeLoadFromClobberingMemInst(Type *, Value *, MemIntrinsic *);
+-  int analyzeLoadFromClobberingWrite(Type *, Value *, Value *, uint64_t);
++
+   // Congruence finding
+   // Templated to allow them to work both on BB's and BB-edges
+   template <class T>
+-  std::pair<Value *, bool> lookupOperandLeader(Value *, const T &) const;
+-
++  Value *lookupOperandLeader(Value *, const User *, const T &) const;
+   void performCongruenceFinding(Value *, const Expression *);
+-  // Predicate and reachability handling
++
++  // Reachability handling
+   void updateReachableEdge(BasicBlock *, BasicBlock *);
+   void processOutgoingEdges(TerminatorInst *, BasicBlock *);
+-  void propagateChangeInEdge(BasicBlock *);
+   bool isOnlyReachableViaThisEdge(const BasicBlockEdge &);
+-
+   Value *findConditionEquivalence(Value *, BasicBlock *) const;
+-  const std::pair<unsigned, unsigned>
+-  calculateDominatedInstRange(const DomTreeNode *);
+-  Value *inferValueAtBlock(Value *V, const BasicBlock *B) const;
+-  Value *inferValueOfPredicate(Value *V, const BasicBlock *B) const;
++
+   // Elimination
+   struct ValueDFS;
+   void convertDenseToDFSOrdered(CongruenceClass::MemberSet &,
+-                                std::vector<ValueDFS> &, bool);
++                                std::vector<ValueDFS> &);
+ 
+   bool eliminateInstructions(Function &);
+   void replaceInstruction(Instruction *, Value *);
+   void markInstructionForDeletion(Instruction *);
+   void deleteInstructionsInBlock(BasicBlock *);
+-  bool canCoerceMustAliasedValueToLoad(Value *, Type *);
+-  Value *coerceAvailableValueToLoadType(Value *, Type *, Instruction *);
+-  Value *getStoreValueForLoad(Value *, unsigned, Type *, Instruction *);
+-  Value *getLoadValueForLoad(LoadInst *, unsigned, Type *, Instruction *);
+-  Value *getMemInstValueForLoad(MemIntrinsic *, unsigned, Type *, Instruction *,
+-                                bool NoNewInst = false);
+-  Value *coerceLoad(Value *);
++
+   // New instruction creation
+   void handleNewInstruction(Instruction *){};
+   void markUsersTouched(Value *);
++
+   // Utilities
+   void cleanupTables();
+   std::pair<unsigned, unsigned> assignDFSNumbers(BasicBlock *, unsigned);
+-
+-  /// Represents a particular available value that we know how to materialize.
+-  /// Materialization of an AvailableValue never fails.  An AvailableValue is
+-  /// implicitly associated with a rematerialization point which is the
+-  /// location of the instruction from which it was formed.
+-  struct AvailableValue {
+-    enum ValType {
+-      SimpleVal, // A simple offsetted value that is accessed.
+-      LoadVal,   // A value produced by a load.
+-      MemIntrin, // A memory intrinsic which is loaded from.
+-      UndefVal   // A UndefValue representing a value from dead block (which
+-                 // is not yet physically removed from the CFG).
+-    };
+-
+-    /// V - The value that is live out of the block.
+-    PointerIntPair<Value *, 2, ValType> Val;
+-
+-    /// Offset - The byte offset in Val that is interesting for the load query.
+-    unsigned Offset;
+-
+-    static AvailableValue get(Value *V, unsigned Offset = 0) {
+-      AvailableValue Res;
+-      Res.Val.setPointer(V);
+-      Res.Val.setInt(SimpleVal);
+-      Res.Offset = Offset;
+-      return Res;
+-    }
+-
+-    static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset = 0) {
+-      AvailableValue Res;
+-      Res.Val.setPointer(MI);
+-      Res.Val.setInt(MemIntrin);
+-      Res.Offset = Offset;
+-      return Res;
+-    }
+-
+-    static AvailableValue getLoad(LoadInst *LI, unsigned Offset = 0) {
+-      AvailableValue Res;
+-      Res.Val.setPointer(LI);
+-      Res.Val.setInt(LoadVal);
+-      Res.Offset = Offset;
+-      return Res;
+-    }
+-
+-    static AvailableValue getUndef() {
+-      AvailableValue Res;
+-      Res.Val.setPointer(nullptr);
+-      Res.Val.setInt(UndefVal);
+-      Res.Offset = 0;
+-      return Res;
+-    }
+-
+-    bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
+-    bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
+-    bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
+-    bool isUndefValue() const { return Val.getInt() == UndefVal; }
+-
+-    Value *getSimpleValue() const {
+-      assert(isSimpleValue() && "Wrong accessor");
+-      return Val.getPointer();
+-    }
+-
+-    LoadInst *getCoercedLoadValue() const {
+-      assert(isCoercedLoadValue() && "Wrong accessor");
+-      return cast<LoadInst>(Val.getPointer());
+-    }
+-
+-    MemIntrinsic *getMemIntrinValue() const {
+-      assert(isMemIntrinValue() && "Wrong accessor");
+-      return cast<MemIntrinsic>(Val.getPointer());
+-    }
+-
+-    /// Emit code at the specified insertion point to adjust the value defined
+-    /// here to the specified type. This handles various coercion cases.
+-    Value *MaterializeAdjustedValue(LoadInst *LI, Instruction *InsertPt,
+-                                    NewGVN &gvn) const;
+-  };
+-
+-  /// Represents an AvailableValue which can be rematerialized at the end of
+-  /// the associated BasicBlock.
+-  struct AvailableValueInBlock {
+-    /// BB - The basic block in question.
+-    BasicBlock *BB;
+-
+-    /// AV - The actual available value
+-    AvailableValue AV;
+-
+-    static AvailableValueInBlock get(BasicBlock *BB, AvailableValue &&AV) {
+-      AvailableValueInBlock Res;
+-      Res.BB = BB;
+-      Res.AV = std::move(AV);
+-      return Res;
+-    }
+-
+-    static AvailableValueInBlock get(BasicBlock *BB, Value *V,
+-                                     unsigned Offset = 0) {
+-      return get(BB, AvailableValue::get(V, Offset));
+-    }
+-    static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
+-                                       unsigned Offset = 0) {
+-      return get(BB, AvailableValue::getMI(MI, Offset));
+-    }
+-    static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
+-                                         unsigned Offset = 0) {
+-      return get(BB, AvailableValue::getLoad(LI, Offset));
+-    }
+-    static AvailableValueInBlock getUndef(BasicBlock *BB) {
+-      return get(BB, AvailableValue::getUndef());
+-    }
+-
+-    /// Emit code at the end of this block to adjust the value defined here to
+-    /// the specified type. This handles various coercion cases.
+-    Value *MaterializeAdjustedValue(LoadInst *LI, NewGVN &gvn) const {
+-      return AV.MaterializeAdjustedValue(LI, BB->getTerminator(), gvn);
+-    }
+-  };
+-
+-  // PRE
+-  typedef SmallVector<AvailableValueInBlock, 64> AvailValInBlkVect;
+-  typedef SmallVector<std::pair<BasicBlock *, const Expression *>, 64>
+-      UnavailBlkVect;
+-  typedef SmallDenseMap<const BasicBlock *, AvailableValueInBlock>
+-      AvailValInBlkMap;
+-  typedef DenseMap<const BasicBlock *, char> FullyAvailableMap;
+-
+-  bool isValueFullyAvailableInBlock(BasicBlock *, FullyAvailableMap &,
+-                                    uint32_t);
+-  Value *findPRELeader(Value *, const BasicBlock *, const Value *);
+-  Value *findPRELeader(const Expression *, const BasicBlock *, const Value *);
+-  bool phiTranslateArguments(const BasicExpression *, BasicExpression *,
+-                             const BasicBlock *, const Value *);
+-  MemoryAccess *phiTranslateMemoryAccess(MemoryAccess *, const BasicBlock *);
+-  const Expression *phiTranslateExpression(const Expression *E, BasicBlock *,
+-                                           BasicBlock *, const Value *);
+-  BasicBlock *splitCriticalEdges(BasicBlock *, BasicBlock *);
+-  void analyzeAvailability(Instruction *, AvailValInBlkVect &,
+-                           UnavailBlkVect &);
+-  bool performPRE(Instruction *, AvailValInBlkVect &, UnavailBlkVect &);
+-  bool performPREOnClass(CongruenceClass *);
+-  Value *constructSSAForSet(Instruction *,
+-                            SmallVectorImpl<AvailableValueInBlock> &);
+-  void valueNumberNewInstruction(Value *);
+-  void valueNumberNewInstructionToValue(Value *, Value *);
+-  const Expression *trySimplifyPREExpression(const Expression *,
+-                                             const BasicBlock *);
+-  Value *regenerateExpression(const Expression *, BasicBlock *);
+-  void topoVisitCongruenceClass(CongruenceClass *,
+-                                SmallDenseMap<CongruenceClass *, unsigned> &,
+-                                SmallPtrSetImpl<CongruenceClass *> &);
++  void updateProcessedCount(Value *V);
+ };
+ 
+ char NewGVN::ID = 0;
+@@ -570,8 +307,8 @@ FunctionPass *llvm::createNewGVNPass() { return new NewGVN(); }
+ static std::string getBlockName(const BasicBlock *B) {
+   return DOTGraphTraits<const Function *>::getSimpleNodeLabel(B, NULL);
+ }
+-
+ #endif
++
+ INITIALIZE_PASS_BEGIN(NewGVN, "newgvn", "Global Value Numbering", false, false)
+ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+ INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+@@ -580,7 +317,6 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+ INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+ INITIALIZE_PASS_END(NewGVN, "newgvn", "Global Value Numbering", false, false)
+-
+ PHIExpression *NewGVN::createPHIExpression(Instruction *I) {
+   BasicBlock *PhiBlock = I->getParent();
+   PHINode *PN = cast<PHINode>(I);
+@@ -590,7 +326,6 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I) {
+   E->allocateOperands(ArgRecycler, ExpressionAllocator);
+   E->setType(I->getType());
+   E->setOpcode(I->getOpcode());
+-  bool UsedEquiv = false;
+   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+     BasicBlock *B = PN->getIncomingBlock(i);
+     if (!ReachableBlocks.count(B)) {
+@@ -600,14 +335,12 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I) {
+     }
+     if (I->getOperand(i) != I) {
+       const BasicBlockEdge BBE(B, PhiBlock);
+-      auto Operand = lookupOperandLeader(I->getOperand(i), BBE);
+-      E->ops_push_back(Operand.first);
+-      UsedEquiv |= Operand.second;
++      auto Operand = lookupOperandLeader(I->getOperand(i), I, BBE);
++      E->ops_push_back(Operand);
+     } else {
+       E->ops_push_back(I->getOperand(i));
+     }
+   }
+-  E->setUsedInference(UsedEquiv);
+   return E;
+ }
+ 
+@@ -617,7 +350,6 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I) {
+ bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E,
+                                     const BasicBlock *B) {
+   bool AllConstant = true;
+-  bool UsedInference = false;
+   if (auto GEP = dyn_cast<GetElementPtrInst>(I))
+     E->setType(GEP->getSourceElementType());
+   else
+@@ -626,14 +358,11 @@ bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E,
+   E->allocateOperands(ArgRecycler, ExpressionAllocator);
+ 
+   for (auto &O : I->operands()) {
+-    auto Res = inferValueAtBlock(O, B);
+-    auto Operand = lookupOperandLeader(O, B);
+-    UsedInference |= Operand.second;
+-    if (!isa<Constant>(Operand.first))
++    auto Operand = lookupOperandLeader(O, I, B);
++    if (!isa<Constant>(Operand))
+       AllConstant = false;
+-    E->ops_push_back(Operand.first);
++    E->ops_push_back(Operand);
+   }
+-  E->setUsedInference(UsedInference);
+   return AllConstant;
+ }
+ 
+@@ -653,14 +382,10 @@ const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T,
+     if (Arg1 > Arg2)
+       std::swap(Arg1, Arg2);
+   }
+-  bool UsedEquiv = false;
+-  auto BinaryLeader = lookupOperandLeader(Arg1, B);
+-  UsedEquiv |= BinaryLeader.second;
+-  E->ops_push_back(BinaryLeader.first);
+-
+-  BinaryLeader = lookupOperandLeader(Arg2, B);
+-  UsedEquiv |= BinaryLeader.second;
+-  E->ops_push_back(BinaryLeader.first);
++  auto BinaryLeader = lookupOperandLeader(Arg1, nullptr, B);
++  E->ops_push_back(BinaryLeader);
++  BinaryLeader = lookupOperandLeader(Arg2, nullptr, B);
++  E->ops_push_back(BinaryLeader);
+ 
+   Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), *DL, TLI,
+                            DT, AC);
+@@ -690,7 +415,7 @@ const Expression *NewGVN::checkSimplificationResults(Expression *E,
+ 
+     cast<BasicExpression>(E)->deallocateOperands(ArgRecycler);
+     ExpressionAllocator.Deallocate(E);
+-    return createConstantExpression(C, E->usedEquivalence());
++    return createConstantExpression(C);
+   } else if (isa<Argument>(V) || isa<GlobalVariable>(V)) {
+ #ifndef NDEBUG
+     if (I)
+@@ -699,7 +424,7 @@ const Expression *NewGVN::checkSimplificationResults(Expression *E,
+ #endif
+     cast<BasicExpression>(E)->deallocateOperands(ArgRecycler);
+     ExpressionAllocator.Deallocate(E);
+-    return createVariableExpression(V, E->usedEquivalence());
++    return createVariableExpression(V);
+   }
+ 
+   CongruenceClass *CC = ValueToClass.lookup(V);
+@@ -842,26 +567,24 @@ NewGVN::createAggregateValueExpression(Instruction *I, const BasicBlock *B) {
+ }
+ 
+ const VariableExpression *
+-NewGVN::createVariableExpression(Value *V, bool UsedEquivalence) {
++NewGVN::createVariableExpression(Value *V) {
+   VariableExpression *E = new (ExpressionAllocator) VariableExpression(V);
+   E->setOpcode(V->getValueID());
+-  E->setUsedInference(UsedEquivalence);
+   return E;
+ }
+ 
+ const Expression *NewGVN::createVariableOrConstant(Value *V,
+                                                    const BasicBlock *B) {
+-  auto Leader = lookupOperandLeader(V, B);
+-  if (Constant *C = dyn_cast<Constant>(Leader.first))
+-    return createConstantExpression(C, Leader.second);
+-  return createVariableExpression(Leader.first, Leader.second);
++  auto Leader = lookupOperandLeader(V, nullptr, B);
++  if (Constant *C = dyn_cast<Constant>(Leader))
++    return createConstantExpression(C);
++  return createVariableExpression(Leader);
+ }
+ 
+ const ConstantExpression *
+-NewGVN::createConstantExpression(Constant *C, bool UsedEquivalence) {
++NewGVN::createConstantExpression(Constant *C) {
+   ConstantExpression *E = new (ExpressionAllocator) ConstantExpression(C);
+   E->setOpcode(C->getValueID());
+-  E->setUsedInference(UsedEquivalence);
+   return E;
+ }
+ 
+@@ -876,15 +599,14 @@ const CallExpression *NewGVN::createCallExpression(CallInst *CI,
+ 
+ // lookupOperandLeader -- See if we have a congruence class and leader
+ // for this operand, and if so, return it. Otherwise, return the
+-// original operand.  The second part of the return value is true if a
+-// dominating equivalence is being returned.
++// original operand.
+ template <class T>
+-std::pair<Value *, bool> NewGVN::lookupOperandLeader(Value *V,
+-                                                     const T &B) const {
++Value *NewGVN::lookupOperandLeader(Value *V, const User *U,
++                                   const T &B) const {
+   CongruenceClass *CC = ValueToClass.lookup(V);
+   if (CC && (CC != InitialClass))
+-    return std::make_pair(CC->RepLeader, false);
+-  return std::make_pair(V, false);
++    return CC->RepLeader;
++  return V;
+ }
+ 
+ LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp,
+@@ -895,30 +617,11 @@ LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp,
+   E->setType(LoadType);
+   // Give store and loads same opcode so they value number together
+   E->setOpcode(0);
+-  auto Operand = lookupOperandLeader(PointerOp, B);
+-  E->ops_push_back(Operand.first);
++  auto Operand = lookupOperandLeader(PointerOp, LI, B);
++  E->ops_push_back(Operand);
+   if (LI)
+     E->setAlignment(LI->getAlignment());
+-  E->setUsedInference(Operand.second);
+-
+-  // TODO: Value number heap versions. We may be able to discover
+-  // things alias analysis can't on it's own (IE that a store and a
+-  // load have the same value, and thus, it isn't clobbering the load)
+-  return E;
+-}
+ 
+-const CoercibleLoadExpression *NewGVN::createCoercibleLoadExpression(
+-    Type *LoadType, Value *PtrOperand, LoadInst *Original, MemoryAccess *DA,
+-    unsigned Offset, Value *SrcVal, const BasicBlock *B) {
+-  CoercibleLoadExpression *E = new (ExpressionAllocator)
+-      CoercibleLoadExpression(1, Original, DA, Offset, SrcVal);
+-  E->allocateOperands(ArgRecycler, ExpressionAllocator);
+-  E->setType(LoadType);
+-  // Give store and loads same opcode so they value number together
+-  E->setOpcode(0);
+-  auto Operand = lookupOperandLeader(PtrOperand, B);
+-  E->ops_push_back(Operand.first);
+-  E->setUsedInference(Operand.second);
+   // TODO: Value number heap versions. We may be able to discover
+   // things alias analysis can't on it's own (IE that a store and a
+   // load have the same value, and thus, it isn't clobbering the load)
+@@ -934,300 +637,21 @@ const StoreExpression *NewGVN::createStoreExpression(StoreInst *SI,
+   E->setType(SI->getValueOperand()->getType());
+   // Give store and loads same opcode so they value number together
+   E->setOpcode(0);
+-  auto Operand = lookupOperandLeader(SI->getPointerOperand(), B);
+-  E->ops_push_back(Operand.first);
+-  E->setUsedInference(Operand.second);
++  auto Operand = lookupOperandLeader(SI->getPointerOperand(), SI, B);
++  E->ops_push_back(Operand);
+   // TODO: Value number heap versions. We may be able to discover
+   // things alias analysis can't on it's own (IE that a store and a
+   // load have the same value, and thus, it isn't clobbering the load)
+   return E;
+ }
+ 
+-/// This function is called when we have a
+-/// memdep query of a load that ends up being a clobbering memory write (store,
+-/// memset, memcpy, memmove).  This means that the write *may* provide bits used
+-/// by the load but we can't be sure because the pointers don't mustalias.
+-///
+-/// Check this case to see if there is anything more we can do before we give
+-/// up.  This returns -1 if we have to give up, or a byte number in the stored
+-/// value of the piece that feeds the load.
+-int NewGVN::analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
+-                                           Value *WritePtr,
+-                                           uint64_t WriteSizeInBits) {
+-  // If the loaded or stored value is a first class array or struct, don't try
+-  // to transform them.  We need to be able to bitcast to integer.
+-  if (LoadTy->isStructTy() || LoadTy->isArrayTy())
+-    return -1;
+-
+-  int64_t StoreOffset = 0, LoadOffset = 0;
+-  Value *StoreBase =
+-      GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, *DL);
+-  Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, *DL);
+-  if (StoreBase != LoadBase)
+-    return -1;
+-
+-  // If the load and store don't overlap at all, the store doesn't provide
+-  // anything to the load.  In this case, they really don't alias at all, AA
+-  // must have gotten confused.
+-  uint64_t LoadSize = DL->getTypeSizeInBits(LoadTy);
+-
+-  if ((WriteSizeInBits & 7) | (LoadSize & 7))
+-    return -1;
+-  uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes.
+-  LoadSize >>= 3;
+-
+-  bool isAAFailure = false;
+-  if (StoreOffset < LoadOffset)
+-    isAAFailure = StoreOffset + int64_t(StoreSize) <= LoadOffset;
+-  else
+-    isAAFailure = LoadOffset + int64_t(LoadSize) <= StoreOffset;
+-
+-  if (isAAFailure) {
+-    return -1;
+-  }
+-
+-  // If the Load isn't completely contained within the stored bits, we don't
+-  // have all the bits to feed it.  We could do something crazy in the future
+-  // (issue a smaller load then merge the bits in) but this seems unlikely to be
+-  // valuable.
+-  if (StoreOffset > LoadOffset ||
+-      StoreOffset + StoreSize < LoadOffset + LoadSize)
+-    return -1;
+-
+-  // Okay, we can do this transformation.  Return the number of bytes into the
+-  // store that the load is.
+-  return LoadOffset - StoreOffset;
+-}
+-
+-/// This function is called when we have a
+-/// memdep query of a load that ends up being a clobbering store.
+-int NewGVN::analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
+-                                           StoreInst *DepSI) {
+-
+-  // Cannot handle reading from store of first-class aggregate yet.
+-  if (DepSI->getValueOperand()->getType()->isStructTy() ||
+-      DepSI->getValueOperand()->getType()->isArrayTy())
+-    return -1;
+-
+-  Value *StorePtr = DepSI->getPointerOperand();
+-  uint64_t StoreSize =
+-      DL->getTypeSizeInBits(DepSI->getValueOperand()->getType());
+-  return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, StorePtr, StoreSize);
+-}
+-
+-/// This function is called when we have a
+-/// memdep query of a load that ends up being clobbered by another load.  See if
+-/// the other load can feed into the second load.
+-int NewGVN::analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
+-                                          LoadInst *DepLI) {
+-  // Cannot handle reading from store of first-class aggregate yet.
+-  if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
+-    return -1;
+-
+-  Value *DepPtr = DepLI->getPointerOperand();
+-  uint64_t DepSize = DL->getTypeSizeInBits(DepLI->getType());
+-  int Offset = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize);
+-
+-  if (Offset != -1) {
+-    // If the size is too large and we will have to widen, ensure we pass the
+-    // widening rules below
+-    unsigned SrcValSize = DL->getTypeStoreSize(DepLI->getType());
+-    unsigned LoadSize = DL->getTypeStoreSize(LoadTy);
+-    if (Offset + LoadSize <= SrcValSize)
+-      return Offset;
+-  }
+-
+-  // If we have a load/load clobber an DepLI can be widened to cover this load,
+-  // then we should widen it!
+-  int64_t LoadOffs = 0;
+-  const Value *LoadBase =
+-      GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, *DL);
+-  unsigned LoadSize = DL->getTypeStoreSize(LoadTy);
+-
+-  unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
+-      LoadBase, LoadOffs, LoadSize, DepLI);
+-  if (Size == 0)
+-    return -1;
+-
+-  return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size * 8);
+-}
+-
+-int NewGVN::analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
+-                                             MemIntrinsic *MI) {
+-  // If the mem operation is a non-constant size, we can't handle it.
+-  ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
+-  if (!SizeCst)
+-    return -1;
+-  uint64_t MemSizeInBits = SizeCst->getZExtValue() * 8;
+-
+-  // If this is memset, we just need to see if the offset is valid in the size
+-  // of the memset..
+-  if (MI->getIntrinsicID() == Intrinsic::memset)
+-    return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
+-                                          MemSizeInBits);
+-
+-  // If we have a memcpy/memmove, the only case we can handle is if this is a
+-  // copy from constant memory.  In that case, we can read directly from the
+-  // constant memory.
+-  MemTransferInst *MTI = cast<MemTransferInst>(MI);
+-
+-  Constant *Src = dyn_cast<Constant>(MTI->getSource());
+-  if (!Src)
+-    return -1;
+-
+-  GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, *DL));
+-  if (!GV || !GV->isConstant())
+-    return -1;
+-
+-  // See if the access is within the bounds of the transfer.
+-  int Offset = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
+-                                              MemSizeInBits);
+-  if (Offset == -1)
+-    return Offset;
+-
+-  unsigned AS = Src->getType()->getPointerAddressSpace();
+-  // Otherwise, see if we can constant fold a load from the constant with the
+-  // offset applied as appropriate.
+-  Src =
+-      ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS));
+-  Constant *OffsetCst =
+-      ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+-  Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
+-                                       OffsetCst);
+-  Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
+-  if (ConstantFoldLoadFromConstPtr(Src, LoadTy, *DL))
+-    return Offset;
+-  return -1;
+-}
+-
+ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I,
+                                                          const BasicBlock *B) {
+   StoreInst *SI = cast<StoreInst>(I);
+-  const Expression *E =
+-      createStoreExpression(SI, MSSAWalker->getClobberingMemoryAccess(SI), B);
++  const Expression *E = createStoreExpression(SI, MSSA->getMemoryAccess(SI), B);
+   return E;
+ }
+ 
+-const Expression *
+-NewGVN::performSymbolicLoadCoercionFromPhi(Type *LoadType, Value *LoadPtr,
+-                                           LoadInst *LI, MemoryPhi *DefiningPhi,
+-                                           const BasicBlock *B) {
+-  if (!LoadPtr)
+-    return nullptr;
+-  AvailValInBlkVect ValuesPerBlock;
+-  for (auto &Op : DefiningPhi->operands()) {
+-    BasicBlock *IncomingBlock = DefiningPhi->getIncomingBlock(Op);
+-    // If the block is not reachable, it's undef
+-    if (!ReachableBlocks.count(IncomingBlock)) {
+-      ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(IncomingBlock));
+-      continue;
+-    }
+-    // FIXME: We don't support phi over phi
+-    if (isa<MemoryPhi>(Op))
+-      return nullptr;
+-    const MemoryDef *Def = cast<MemoryDef>(Op);
+-    Instruction *Inst = Def->getMemoryInst();
+-    if (!Inst)
+-      return nullptr;
+-    // If the dependence is to a store that writes to a superset of the bits
+-    // read by the load, we can extract the bits we need for the load from the
+-    // stored value.
+-    if (StoreInst *DepSI = dyn_cast<StoreInst>(Inst)) {
+-      int Offset = analyzeLoadFromClobberingStore(LoadType, LoadPtr, DepSI);
+-      if (Offset != -1) {
+-        ValuesPerBlock.push_back(AvailableValueInBlock::get(
+-            Def->getBlock(), DepSI->getValueOperand(), Offset));
+-        continue;
+-      }
+-    }
+-    // Check to see if we have something like this:
+-    //    load i32* P
+-    //    load i8* (P+1)
+-    // if we have this, replace the later with an extraction from the former.
+-    if (LoadInst *DepLI = dyn_cast<LoadInst>(Inst)) {
+-      // If this is a clobber and L is the first instruction in its block, then
+-      // we have the first instruction in the entry block.
+-      if (DepLI != LI) {
+-        int Offset = analyzeLoadFromClobberingLoad(LoadType, LoadPtr, DepLI);
+-
+-        if (Offset != -1) {
+-          ValuesPerBlock.push_back(
+-              AvailableValueInBlock::getLoad(Def->getBlock(), DepLI, Offset));
+-          continue;
+-        }
+-      }
+-    }
+-    // If the clobbering value is a memset/memcpy/memmove, see if we can
+-    // forward a value on from it.
+-    if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Inst)) {
+-      int Offset = analyzeLoadFromClobberingMemInst(LoadType, LoadPtr, DepMI);
+-      if (Offset != -1) {
+-        Value *PossibleConstant =
+-            getMemInstValueForLoad(DepMI, Offset, LoadType, LI, true);
+-        ValuesPerBlock.push_back(
+-            AvailableValueInBlock::getMI(Def->getBlock(), DepMI, Offset));
+-        continue;
+-      }
+-    }
+-    return nullptr;
+-  }
+-  assert(ValuesPerBlock.size() == DefiningPhi->getNumIncomingValues());
+-  return nullptr;
+-}
+-
+-const Expression *NewGVN::performSymbolicLoadCoercion(
+-    Type *LoadType, Value *LoadPtr, LoadInst *LI, Instruction *DepInst,
+-    MemoryAccess *DefiningAccess, const BasicBlock *B) {
+-  assert((!LI || LI->isSimple()) && "Not a simple load");
+-
+-  if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
+-    Value *LoadAddressLeader = lookupOperandLeader(LoadPtr, B).first;
+-    Value *StoreAddressLeader =
+-        lookupOperandLeader(DepSI->getPointerOperand(), B).first;
+-    Value *StoreVal = DepSI->getValueOperand();
+-    if (StoreVal->getType() == LoadType &&
+-        LoadAddressLeader == StoreAddressLeader) {
+-      return createVariableOrConstant(DepSI->getValueOperand(), B);
+-    } else {
+-      int Offset = analyzeLoadFromClobberingStore(LoadType, LoadPtr, DepSI);
+-      if (Offset >= 0)
+-        return createCoercibleLoadExpression(
+-            LoadType, LoadPtr, LI, DefiningAccess, (unsigned)Offset, DepSI, B);
+-    }
+-  } else if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
+-    int Offset = analyzeLoadFromClobberingLoad(LoadType, LoadPtr, DepLI);
+-    if (Offset >= 0)
+-      return createCoercibleLoadExpression(
+-          LoadType, LoadPtr, LI, DefiningAccess, (unsigned)Offset, DepLI, B);
+-  } else if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInst)) {
+-    int Offset = analyzeLoadFromClobberingMemInst(LoadType, LoadPtr, DepMI);
+-    if (Offset >= 0)
+-      return createCoercibleLoadExpression(
+-          LoadType, LoadPtr, LI, DefiningAccess, (unsigned)Offset, DepMI, B);
+-  }
+-  // If this load really doesn't depend on anything, then we must be loading
+-  // an
+-  // undef value.  This can happen when loading for a fresh allocation with
+-  // no
+-  // intervening stores, for example.
+-  else if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI))
+-    return createConstantExpression(UndefValue::get(LoadType), false);
+-
+-  // If this load occurs either right after a lifetime begin,
+-  // then the loaded value is undefined.
+-  else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DepInst)) {
+-    if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+-      return createConstantExpression(UndefValue::get(LoadType), false);
+-  }
+-  // If this load follows a calloc (which zero initializes memory),
+-  // then the loaded value is zero
+-  else if (isCallocLikeFn(DepInst, TLI)) {
+-    return createConstantExpression(Constant::getNullValue(LoadType), false);
+-  }
+-
+-  return nullptr;
+-}
+-
+ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I,
+                                                         const BasicBlock *B) {
+   LoadInst *LI = cast<LoadInst>(I);
+@@ -1238,10 +662,10 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I,
+     return nullptr;
+ 
+   Value *LoadAddressLeader =
+-      lookupOperandLeader(LI->getPointerOperand(), B).first;
++      lookupOperandLeader(LI->getPointerOperand(), I, B);
+   // Load of undef is undef
+   if (isa<UndefValue>(LoadAddressLeader))
+-    return createConstantExpression(UndefValue::get(LI->getType()), false);
++    return createConstantExpression(UndefValue::get(LI->getType()));
+ 
+   MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(I);
+ 
+@@ -1251,58 +675,7 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I,
+       // If the defining instruction is not reachable, replace with
+       // undef
+       if (!ReachableBlocks.count(DefiningInst->getParent()))
+-        return createConstantExpression(UndefValue::get(LI->getType()), false);
+-      const Expression *CoercionResult =
+-          performSymbolicLoadCoercion(LI->getType(), LI->getPointerOperand(),
+-                                      LI, DefiningInst, DefiningAccess, B);
+-      if (CoercionResult)
+-        return CoercionResult;
+-    } else if (MemoryPhi *MP = dyn_cast<MemoryPhi>(DefiningAccess)) {
+-      const Expression *CoercionResult = performSymbolicLoadCoercionFromPhi(
+-          LI->getType(), LI->getPointerOperand(), LI, MP, B);
+-      if (CoercionResult)
+-        return CoercionResult;
+-    }
+-  } else {
+-    BasicBlock *LoadBlock = LI->getParent();
+-    MemoryAccess *LoadAccess = MSSA->getMemoryAccess(LI);
+-    // Okay, so uh, we couldn't use the defining access to grab a value out of
+-    // See if we can reuse any of it's uses by widening a load.
+-    for (const auto &U : DefiningAccess->users()) {
+-      MemoryAccess *MA = cast<MemoryAccess>(U);
+-      if (MA == LoadAccess)
+-        continue;
+-      if (isa<MemoryPhi>(MA))
+-        continue;
+-      Instruction *DefiningInst = cast<MemoryUseOrDef>(MA)->getMemoryInst();
+-      if (LoadInst *DepLI = dyn_cast<LoadInst>(DefiningInst)) {
+-        BasicBlock *DefiningBlock = DefiningInst->getParent();
+-        if (!DT->dominates(DefiningBlock, LoadBlock))
+-          continue;
+-
+-        // Make sure the dependent load comes before the load we are trying
+-        // to coerce if they are in the same block
+-        if (InstrDFS[DepLI] >= InstrDFS[LI])
+-          continue;
+-
+-        // Now, first make sure they really aren't identical loads, and if
+-        // they aren't see if the dominating one can be coerced.
+-        // We don't want to mark identical loads coercible, since coercible
+-        // loads don't value number with normal loads.
+-        Value *DepAddressLeader =
+-            lookupOperandLeader(DepLI->getPointerOperand(), B).first;
+-
+-        if (LI->getType() != DepLI->getType() ||
+-            DepAddressLeader != LoadAddressLeader ||
+-            LI->isSimple() != DepLI->isSimple()) {
+-          int Offset = analyzeLoadFromClobberingLoad(
+-              LI->getType(), LI->getPointerOperand(), DepLI);
+-          if (Offset >= 0)
+-            return createCoercibleLoadExpression(
+-                LI->getType(), LI->getPointerOperand(), LI, DefiningAccess,
+-                (unsigned)Offset, DepLI, B);
+-        }
+-      }
++        return createConstantExpression(UndefValue::get(LI->getType()));
+     }
+   }
+ 
+@@ -1335,7 +708,7 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I,
+                  << "\n");
+     E->deallocateOperands(ArgRecycler);
+     ExpressionAllocator.Deallocate(E);
+-    return createConstantExpression(UndefValue::get(I->getType()), false);
++    return createConstantExpression(UndefValue::get(I->getType()));
+   }
+ 
+   Value *AllSameValue = E->getOperand(0);
+@@ -1369,8 +742,8 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I,
+     E->deallocateOperands(ArgRecycler);
+     ExpressionAllocator.Deallocate(E);
+     if (Constant *C = dyn_cast<Constant>(AllSameValue))
+-      return createConstantExpression(C, E->usedEquivalence());
+-    return createVariableExpression(AllSameValue, E->usedEquivalence());
++      return createConstantExpression(C);
++    return createVariableExpression(AllSameValue);
+   }
+   return E;
+ }
+@@ -1421,12 +794,11 @@ NewGVN::performSymbolicAggrValueEvaluation(Instruction *I,
+ /// before value numbering
+ const Expression *NewGVN::performSymbolicEvaluation(Value *V,
+                                                     const BasicBlock *B) {
+-  // XXX: Use forward propagation by substituting DefiningExpr
+   const Expression *E = NULL;
+   if (Constant *C = dyn_cast<Constant>(V))
+-    E = createConstantExpression(C, false);
++    E = createConstantExpression(C);
+   else if (isa<Argument>(V) || isa<GlobalVariable>(V)) {
+-    E = createVariableExpression(V, false);
++    E = createVariableExpression(V);
+   } else {
+     // TODO: memory intrinsics
+     // TODO: Some day, we should do the forward propagation and reassociation
+@@ -1509,14 +881,7 @@ bool NewGVN::isOnlyReachableViaThisEdge(const BasicBlockEdge &E) {
+ // only reachable from Src, in practice it is pointless since at the time
+ // GVN runs all such loops have preheaders, which means that Dst will have
+ // been changed to have only one predecessor, namely Src.
+-#if 0
+-  BasicBlock *EdgeEnd = const_cast<BasicBlock *>(E.getEnd());
+-  if (PredCache.size(EdgeEnd) != 1)
+-    return false;
+-  const BasicBlock *Pred = PredCache.get(EdgeEnd)[0];
+-#else
+   const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
+-#endif
+   const BasicBlock *Src = E.getStart();
+   assert((!Pred || Pred == Src) && "No edge between these basic blocks!");
+   (void)Src;
+@@ -1600,31 +965,16 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
+       DEBUG(dbgs() << "New congruence class for " << V << " is " << EClass->ID
+                    << "\n");
+ 
+-      if (E && isa<CoercibleLoadExpression>(E)) {
+-        const CoercibleLoadExpression *L = cast<CoercibleLoadExpression>(E);
+-        VClass->CoercibleMembers.erase(V);
+-        EClass->CoercibleMembers.insert(V);
+-        CoercionInfo.insert(
+-            std::make_pair(V, std::make_pair(L->getOffset(), L->getSrc())));
+-      } else {
+-        VClass->Members.erase(V);
+-        EClass->Members.insert(V);
+-      }
+-
++      VClass->Members.erase(V);
++      EClass->Members.insert(V);
+       ValueToClass[V] = EClass;
+       // See if we destroyed the class or need to swap leaders
+-      if ((VClass->Members.empty() && VClass->CoercibleMembers.empty()) &&
+-          VClass != InitialClass) {
++      if (VClass->Members.empty() && VClass != InitialClass) {
+         if (VClass->DefiningExpr) {
+           VClass->Dead = true;
+-
+           DEBUG(dbgs() << "Erasing expression " << *E << " from table\n");
+-          // bool wasE = *E == *VClass->expression;
+           ExpressionToClass.erase(VClass->DefiningExpr);
+-          // if (wasE)
+-          //   lookupMap->insert({E, EClass});
+         }
+-        // delete VClass;
+       } else if (VClass->RepLeader == V) {
+         /// XXX: Check this. When the leader changes, the value numbering of
+         /// everything may change, so we need to reprocess.
+@@ -1634,11 +984,6 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
+             TouchedInstructions.set(InstrDFS[I]);
+           ChangedValues.insert(M);
+         }
+-        for (auto EM : VClass->CoercibleMembers) {
+-          if (Instruction *I = dyn_cast<Instruction>(EM))
+-            TouchedInstructions.set(InstrDFS[I]);
+-          ChangedValues.insert(EM);
+-        }
+       }
+     }
+     markUsersTouched(V);
+@@ -1653,12 +998,10 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) {
+   if (ReachableEdges.insert({From, To}).second) {
+     // If this block wasn't reachable before, all instructions are touched
+     if (ReachableBlocks.insert(To).second) {
+-      SingleReachableIncoming.insert({To, From});
+       DEBUG(dbgs() << "Block " << getBlockName(To) << " marked reachable\n");
+       const auto &InstRange = BlockInstRange.lookup(To);
+       TouchedInstructions.set(InstRange.first, InstRange.second);
+     } else {
+-      SingleReachableIncoming.erase(To);
+       DEBUG(dbgs() << "Block " << getBlockName(To)
+                    << " was reachable, but new edge {" << getBlockName(From)
+                    << "," << getBlockName(To) << "} to it found\n");
+@@ -1672,8 +1015,6 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) {
+         TouchedInstructions.set(InstrDFS[&*BI]);
+         ++BI;
+       }
+-      // Propagate the change downstream.
+-      propagateChangeInEdge(To);
+     }
+   }
+ }
+@@ -1682,43 +1023,11 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) {
+ // switch, cmp, or whatever) and a block, see if we know some constant
+ // value for it already
+ Value *NewGVN::findConditionEquivalence(Value *Cond, BasicBlock *B) const {
+-  auto Result = lookupOperandLeader(Cond, B);
+-  if (isa<Constant>(Result.first))
+-    return Result.first;
+-
++  auto Result = lookupOperandLeader(Cond, nullptr, B);
++  if (isa<Constant>(Result))
++    return Result;
+   return nullptr;
+ }
+-Value *NewGVN::inferValueAtBlock(Value *V, const BasicBlock *B) const {
+-  V = lookupOperandLeader(V, B).first;
+-  if (isa<Constant>(V))
+-    return V;
+-  const BasicBlock *FirstBlock = B;
+-  const BasicBlock *LastBlock = nullptr;
+-  while (!isa<Constant>(V) && B != LastBlock) {
+-    B = FirstBlock;
+-    while (B != LastBlock) {
+-      const BasicBlock *SingleIncomingBlock = SingleReachableIncoming.lookup(B);
+-      if (!SingleIncomingBlock) {
+-        const auto DomNode = DT->getNode(B)->getIDom();
+-        if (!DomNode) {
+-          B = LastBlock;
+-          break;
+-        }
+-
+-        B = DomNode->getBlock();
+-        continue;
+-      }
+-      const auto EPI = EdgePredicates.find({SingleIncomingBlock, B});
+-      if (EPI != EdgePredicates.end()) {
+-        for (const auto &Predicate : EPI->second)
+-          if (Predicate.First == V || Predicate.Second == V)
+-            DEBUG(dbgs() << "Edge Predicate found for value " << *V << " \n");
+-      }
+-      B = SingleIncomingBlock;
+-    }
+-  }
+-  return V;
+-}
+ 
+ //  processOutgoingEdges - Process the outgoing edges of a block for
+ //  reachability.
+@@ -1738,37 +1047,10 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) {
+       } else if (isa<ConstantInt>(Cond)) {
+         CondEvaluated = Cond;
+       }
+-    } else {
+-      InvolvedInEquivalence.set(InstrDFS[TI]);
+     }
+     ConstantInt *CI;
+     BasicBlock *TrueSucc = BR->getSuccessor(0);
+     BasicBlock *FalseSucc = BR->getSuccessor(1);
+-    // Start with the true predicate
+-    if (CmpInst *CI = dyn_cast<CmpInst>(Cond)) {
+-      CmpInst::Predicate Predicate = CI->getPredicate();
+-      Value *Op0 = CI->getOperand(0);
+-      Value *Op1 = CI->getOperand(1);
+-      EdgePredicate EP = {Op0, Predicate, Op1,
+-                          ConstantInt::getTrue(B->getContext())};
+-      // Make a consistent order for predicates
+-      if (Op0 > Op1) {
+-        std::swap(EP.First, EP.Second);
+-        EP.Op = CmpInst::getSwappedPredicate(EP.Op);
+-      }
+-
+-      // XXX: Infer value of predicate
+-      // XXX: Recompute predicates properly
+-      const auto EPT = EdgePredicates[{B, TrueSucc}].insert(EP);
+-      if (!EPT.second)
+-        propagateChangeInEdge(TrueSucc);
+-
+-      EP.Val = ConstantInt::getFalse(B->getContext());
+-      const auto EPF = EdgePredicates[{B, FalseSucc}].insert(EP);
+-      if (!EPF.second)
+-        propagateChangeInEdge(FalseSucc);
+-    }
+-
+     if (CondEvaluated && (CI = dyn_cast<ConstantInt>(CondEvaluated))) {
+       if (CI->isOne()) {
+         DEBUG(dbgs() << "Condition for Terminator " << *TI
+@@ -1780,13 +1062,6 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) {
+         updateReachableEdge(B, FalseSucc);
+       }
+     } else {
+-      /* XXX:Predicates
+-      propagateEquality(Cond, ConstantInt::getTrue(TrueSucc->getContext()),
+-                        false, {B, TrueSucc});
+-
+-      propagateEquality(Cond, ConstantInt::getFalse(FalseSucc->getContext()),
+-                        false, {B, FalseSucc});
+-      */
+       updateReachableEdge(B, TrueSucc);
+       updateReachableEdge(B, FalseSucc);
+     }
+@@ -1802,7 +1077,6 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) {
+     Value *CondEvaluated = findConditionEquivalence(SwitchCond, B);
+     // See if we were able to turn this switch statement into a constant
+     if (CondEvaluated && isa<ConstantInt>(CondEvaluated)) {
+-      InvolvedInEquivalence.set(InstrDFS[TI]);
+       ConstantInt *CondVal = cast<ConstantInt>(CondEvaluated);
+       // We should be able to get case value for this
+       auto CaseVal = SI->findCaseValue(CondVal);
+@@ -1827,27 +1101,12 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) {
+         if (Block == TargetBlock)
+           MultipleEdgesOneReachable = true;
+       }
+-      const BasicBlockEdge E(B, TargetBlock);
+-      /* XXX:Predicates
+-      propagateEquality(SwitchCond, CaseVal.getCaseValue(),
+-      MultipleEdgesOneReachable, E);*/
+     } else {
+       for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) {
+         BasicBlock *TargetBlock = SI->getSuccessor(i);
+         ++SwitchEdges[TargetBlock];
+         updateReachableEdge(B, TargetBlock);
+       }
+-      /*XXX:Predicates
+-      // Regardless of answers, propagate equalities for case values
+-      for (auto i = SI->case_begin(), e = SI->case_end(); i != e; ++i) {
+-        BasicBlock *TargetBlock = i.getCaseSuccessor();
+-        if (SwitchEdges.lookup(TargetBlock) == 1) {
+-          const BasicBlockEdge E(B, TargetBlock);
+-          propagateEquality(SwitchCond, i.getCaseValue(),
+-                            MultipleEdgesOneReachable, E);
+-        }
+-        }
+-      */
+     }
+   } else {
+     // Otherwise this is either unconditional, or a type we have no
+@@ -1859,98 +1118,17 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) {
+   }
+ }
+ 
+-// Figure out and cache the dominated instruction range for this block. We do
+-// this by doing a depth first search, and figuring out the min and the max of
+-// the dominated instruction range.
+-// On the way up, we cache the ranges for all the child blocks.
+-// This is essentially an incremental DFS
+-
+-const std::pair<unsigned, unsigned>
+-NewGVN::calculateDominatedInstRange(const DomTreeNode *DTN) {
+-
+-  // First see if we have it, if not, we need to recalculate
+-  const auto &DominatedRange = DominatedInstRange.find(DTN);
+-  if (DominatedRange != DominatedInstRange.end()) {
+-    return DominatedRange->second;
+-  }
+-  // Recalculate
+-  SmallVector<std::pair<const DomTreeNode *, DomTreeNode::const_iterator>, 32>
+-      WorkStack;
+-  WorkStack.emplace_back(DTN, DTN->begin());
+-  unsigned MaxSeen = 0;
+-  while (!WorkStack.empty()) {
+-    const auto &Back = WorkStack.back();
+-    const DomTreeNode *Node = Back.first;
+-    auto ChildIt = Back.second;
+-    const auto &Result = BlockInstRange.lookup(Node->getBlock());
+-    MaxSeen = std::max(MaxSeen, Result.second);
+-    // If we visited all of the children of this node, "recurse" back up the
+-    // stack setting the ranges
+-    if (ChildIt == Node->end()) {
+-      const auto &Result = BlockInstRange.lookup(Node->getBlock());
+-      DominatedInstRange[DTN] = {Result.first, MaxSeen};
+-      WorkStack.pop_back();
+-      if (WorkStack.empty())
+-        return {Result.first, MaxSeen};
+-    } else {
+-      while (ChildIt != Node->end()) {
+-        // Otherwise, recursively visit this child.
+-        const DomTreeNode *Child = *ChildIt;
+-        ++WorkStack.back().second;
+-        const auto &LookupResult = DominatedInstRange.find(Child);
+-        if (LookupResult == DominatedInstRange.end()) {
+-          WorkStack.emplace_back(Child, Child->begin());
+-          break;
+-        } else {
+-          // We already have calculated this subtree
+-          MaxSeen = std::max(MaxSeen, LookupResult->second.second);
+-          ++ChildIt;
+-        }
+-      }
+-    }
+-  }
+-  llvm_unreachable("Should have returned a value already");
+-}
+-
+-/// propagateChangeInEdge - Propagate a change in edge reachability
+-// When we discover a new edge to an existing reachable block, that
+-// can affect the value of instructions that used equivalences.
+-//
+-void NewGVN::propagateChangeInEdge(BasicBlock *Dest) {
+-  // Unlike, the published algorithm, we don't touch blocks because we aren't
+-  // recomputing predicates.
+-  // Instead, we touch all the instructions we dominate that were used in an
+-  // equivalence, in case they changed.
+-  // We incrementally compute the dominated instruction ranges, because doing it
+-  // up front requires a DFS walk of the dominator tree that is a complete waste
+-  // of time if no equivalences ever get seen.
+-  // Note that we expect phi nodes in the Dest block will have already been
+-  // touched by our caller.
+-  DomTreeNode *DTN = DT->getNode(Dest);
+-
+-  // Note that this is an overshoot, because the inst ranges are calculated in
+-  // RPO order, not dominator tree order.
+-  const std::pair<unsigned, unsigned> Result = calculateDominatedInstRange(DTN);
+-
+-  // Touch all the downstream dominated instructions that used equivalences.
+-  for (int InstrNum = InvolvedInEquivalence.find_next(Result.first - 1);
+-       InstrNum != -1 && (Result.second - InstrNum > 0);
+-       InstrNum = InvolvedInEquivalence.find_next(InstrNum)) {
+-    // TODO: We could do confluence block checks here.
+-    TouchedInstructions.set(InstrNum);
+-  }
+-}
+-
++// The algorithm initially places the values of the routine in the INITIAL congruence
++// class. The leader of INITIAL is the undetermined value `TOP`.
++// When the algorithm has finished, values still in INITIAL are unreachable.
+ void NewGVN::initializeCongruenceClasses(Function &F) {
+   // FIXME now i can't remember why this is 2
+   NextCongruenceNum = 2;
+   // Initialize all other instructions to be in INITIAL class
+   CongruenceClass::MemberSet InitialValues;
+-  for (auto &B : F) {
+-    for (auto &I : B) {
++  for (auto &B : F)
++    for (auto &I : B)
+       InitialValues.insert(&I);
+-    }
+-  }
+ 
+   InitialClass = createCongruenceClass(NULL, NULL);
+   for (auto L : InitialValues)
+@@ -1964,22 +1142,21 @@ void NewGVN::initializeCongruenceClasses(Function &F) {
+ }
+ 
+ void NewGVN::cleanupTables() {
+-
+-  ValueToClass.clear();
++#ifndef NDEBUG
+   for (unsigned i = 0, e = CongruenceClasses.size(); i != e; ++i) {
+     DEBUG(dbgs() << "Congruence class " << CongruenceClasses[i]->ID << " has "
+                  << CongruenceClasses[i]->Members.size() << " members\n");
+   }
++#endif
+ 
++  ValueToClass.clear();
+   ArgRecycler.clear(ExpressionAllocator);
+   ExpressionAllocator.Reset();
+   CongruenceClasses.clear();
+   ExpressionToClass.clear();
+   ValueToExpression.clear();
+-  SingleReachableIncoming.clear();
+   ReachableBlocks.clear();
+   ReachableEdges.clear();
+-  EdgePredicates.clear();
+   ProcessedCount.clear();
+   DFSDomMap.clear();
+   InstrDFS.clear();
+@@ -1988,11 +1165,7 @@ void NewGVN::cleanupTables() {
+   DFSToInstr.clear();
+   BlockInstRange.clear();
+   TouchedInstructions.clear();
+-  InvolvedInEquivalence.clear();
+-  CoercionInfo.clear();
+-  CoercionForwarding.clear();
+   DominatedInstRange.clear();
+-  PredCache.clear();
+ }
+ 
+ std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B,
+@@ -2009,30 +1182,23 @@ std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B,
+   return std::make_pair(Start, End);
+ }
+ 
+-void NewGVN::topoVisitCongruenceClass(
+-    CongruenceClass *CC, SmallDenseMap<CongruenceClass *, unsigned> &UsedCount,
+-    SmallPtrSetImpl<CongruenceClass *> &Visited) {
+-  Visited.insert(CC);
+-  // Visit the classes of the values of the operands of the leader set
+-  for (auto Member : CC->Members) {
+-    if (User *U = dyn_cast<User>(Member)) {
+-      for (auto &I : U->operands()) {
+-        CongruenceClass *OperandCC = ValueToClass.lookup(I);
+-        if (OperandCC) {
+-          UsedCount[OperandCC] += 1;
+-          if (!Visited.count(OperandCC))
+-            topoVisitCongruenceClass(OperandCC, UsedCount, Visited);
+-        }
+-      }
+-    }
++void NewGVN::updateProcessedCount(Value *V) {
++#ifndef NDEBUG
++  if (ProcessedCount.count(V) == 0) {
++    ProcessedCount.insert({V, 1});
++  } else {
++    ProcessedCount[V] += 1;
++    assert(ProcessedCount[V] < 100 &&
++           "Seem to have processed the same Value a lot\n");
+   }
++#endif
+ }
+ 
+ /// runOnFunction - This is the main transformation entry point for a
+ /// function.
+ bool NewGVN::runGVN(Function &F, DominatorTree *DT, AssumptionCache *AC,
+-                    TargetLibraryInfo *TLI, AliasAnalysis *AA,
+-                    MemorySSA *MSSA) {
++                   TargetLibraryInfo *TLI, AliasAnalysis *AA,
++                   MemorySSA *MSSA) {
+   bool Changed = false;
+   this->DT = DT;
+   this->AC = AC;
+@@ -2042,12 +1208,9 @@ bool NewGVN::runGVN(Function &F, DominatorTree *DT, AssumptionCache *AC,
+   DL = &F.getParent()->getDataLayout();
+   MSSAWalker = MSSA->getWalker();
+ 
+-  //  SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(AA, DT));
+-
+-  unsigned ICount = 0;
+   // Count number of instructions for sizing of hash tables, and come
+-  // up with a global dfs numbering for instructions
+-
++  // up with a global dfs numbering for instructions.
++  unsigned ICount = 0;
+   SmallPtrSet<BasicBlock *, 16> VisitedBlocks;
+ 
+   // Note: We want RPO traversal of the blocks, which is not quite the same as
+@@ -2056,7 +1219,7 @@ bool NewGVN::runGVN(Function &F, DominatorTree *DT, AssumptionCache *AC,
+   // If we visit in the wrong order, we will end up performing N times as many
+   // iterations.
+   ReversePostOrderTraversal<Function *> RPOT(&F);
+-  for (BasicBlock *B : RPOT) {
++  for (auto &B : RPOT) {
+     VisitedBlocks.insert(B);
+     const auto &BlockRange = assignDFSNumbers(B, ICount);
+     BlockInstRange.insert({B, BlockRange});
+@@ -2066,7 +1229,7 @@ bool NewGVN::runGVN(Function &F, DominatorTree *DT, AssumptionCache *AC,
+   // Handle forward unreachable blocks and figure out which blocks
+   // have single preds
+ 
+-  for (BasicBlock &B : F) {
++  for (auto &B : F) {
+     // Assign numbers to unreachable blocks
+     if (!VisitedBlocks.count(&B)) {
+       const auto &BlockRange = assignDFSNumbers(&B, ICount);
+@@ -2076,7 +1239,6 @@ bool NewGVN::runGVN(Function &F, DominatorTree *DT, AssumptionCache *AC,
+   }
+ 
+   TouchedInstructions.resize(ICount + 1);
+-  InvolvedInEquivalence.resize(ICount + 1);
+   DominatedInstRange.reserve(F.size());
+   // Ensure we don't end up resizing the expressionToClass map, as
+   // that can be quite expensive. At most, we have one expression per
+@@ -2112,17 +1274,7 @@ bool NewGVN::runGVN(Function &F, DominatorTree *DT, AssumptionCache *AC,
+                        << " because it is unreachable\n");
+           continue;
+         }
+-        //#ifndef NDEBUG
+-        if (ProcessedCount.count(CurrBlock) == 0) {
+-          ProcessedCount.insert({CurrBlock, 1});
+-        } else {
+-          ProcessedCount[CurrBlock] += 1;
+-          assert(ProcessedCount[CurrBlock] < 100 &&
+-                 "Seem to have processed the same block a lot\n");
+-          if (ProcessedCount[CurrBlock] >= 100)
+-            report_fatal_error("Processed block too many times");
+-        }
+-        //#endif
++        updateProcessedCount(CurrBlock);
+       }
+       DEBUG(dbgs() << "Processing instruction " << *I << "\n");
+       if (I->use_empty() && !I->getType()->isVoidTy()) {
+@@ -2132,23 +1284,12 @@ bool NewGVN::runGVN(Function &F, DominatorTree *DT, AssumptionCache *AC,
+         TouchedInstructions.reset(InstrNum);
+         continue;
+       }
++      updateProcessedCount(I);
+ 
+-// This is done in case something eliminates the instruction
+-// along the way.
+-
+-#ifndef NDEBUG
+-      if (ProcessedCount.count(I) == 0) {
+-        ProcessedCount.insert({I, 1});
+-      } else {
+-        ProcessedCount[I] += 1;
+-        assert(ProcessedCount[I] < 100 &&
+-               "Seem to have processed the same instruction a lot");
+-      }
+-#endif
++      // This is done in case something eliminates the instruction
++      // along the way.
+       if (!I->isTerminator()) {
+         const Expression *Symbolized = performSymbolicEvaluation(I, CurrBlock);
+-        if (Symbolized && Symbolized->usedEquivalence())
+-          InvolvedInEquivalence.set(InstrDFS[I]);
+         performCongruenceFinding(I, Symbolized);
+       } else {
+         processOutgoingEdges(dyn_cast<TerminatorInst>(I), CurrBlock);
+@@ -2161,53 +1302,6 @@ bool NewGVN::runGVN(Function &F, DominatorTree *DT, AssumptionCache *AC,
+ 
+   Changed |= eliminateInstructions(F);
+ 
+-// The ideal ordering for processing is not quite topological ordering,
+-// because there are multiple roots.  It is essentially "group every vertex
+-// that depends on a given vertex together after that vertex", which is not
+-// the same.  It is in fact, an NP complete problem, and given that the graph
+-// may be cyclic anyway, we order the congruence classes by how many things
+-// depend on them.  This is a good approximation, and will cut down the number
+-// of iterations.
+-#if 0
+-  SmallDenseMap<CongruenceClass *, unsigned> UsedCount;
+-  SmallPtrSet<CongruenceClass *, 16> VisitedClasses;
+-
+-  for (int i = CongruenceClasses.size() - 1; i >= 0; --i) {
+-    CongruenceClass *CC = CongruenceClasses[i];
+-    if (CC == InitialClass || CC->dead || VisitedClasses.count(CC))
+-      continue;
+-    topoVisitCongruenceClass(CC, UsedCount, VisitedClasses);
+-  }
+-  SmallVector<CongruenceClass *, 16> Worklist;
+-  for (auto &CC : CongruenceClasses)
+-    Worklist.push_back(CC);
+-  // std::sort(Worklist.begin(), Worklist.end(),
+-  //           [&UsedCount](CongruenceClass *&A, CongruenceClass *&B) {
+-  //             return UsedCount[A] > UsedCount[B];
+-  //           });
+-
+-  bool PREChanged = true;
+-  while (PREChanged) {
+-    PREChanged = false;
+-#if 1
+-    // FIXME: Handle added congruence classes
+-    if (Worklist.size() != CongruenceClasses.size())
+-      Worklist.insert(Worklist.end(),
+-                      CongruenceClasses.begin() + (Worklist.size() - 1),
+-                      CongruenceClasses.end());
+-#endif
+-    for (auto CC : Worklist) {
+-      if (CC == InitialClass || CC->dead)
+-        continue;
+-
+-      PREChanged |= performPREOnClass(CC);
+-    }
+-  }
+-
+-  PREValueForwarding.clear();
+-
+-  Changed |= PREChanged;
+-#endif
+   // Delete all instructions marked for deletion.
+   for (Instruction *ToErase : InstructionsToErase) {
+     if (!ToErase->use_empty())
+@@ -2241,7 +1335,8 @@ bool NewGVN::runOnFunction(Function &F) {
+                 &getAnalysis<MemorySSAWrapperPass>().getMSSA());
+ }
+ 
+-PreservedAnalyses NewGVNPass::run(Function &F, AnalysisManager<Function> &AM) {
++PreservedAnalyses NewGVNPass::run(Function &F,
++                                  AnalysisManager<Function> &AM) {
+   NewGVN Impl;
+ 
+   // Apparently the order in which we get these results matter for
+@@ -2280,13 +1375,11 @@ struct NewGVN::ValueDFS {
+   int DFSIn;
+   int DFSOut;
+   int LocalNum;
+-  bool Coercible;
+   // Only one of these will be set
+   Value *Val;
+   Use *U;
+   ValueDFS()
+-      : DFSIn(0), DFSOut(0), LocalNum(0), Coercible(false), Val(nullptr),
+-        U(nullptr) {}
++      : DFSIn(0), DFSOut(0), LocalNum(0), Val(nullptr), U(nullptr) {}
+ 
+   bool operator<(const ValueDFS &other) const {
+     // It's not enough that any given field be less than - we have sets
+@@ -2309,8 +1402,6 @@ struct NewGVN::ValueDFS {
+         if (LocalNum < other.LocalNum)
+           return true;
+         else if (LocalNum == other.LocalNum) {
+-          if (!!Coercible < !!other.Coercible)
+-            return true;
+           if (Val < other.Val)
+             return true;
+           if (U < other.U)
+@@ -2323,8 +1414,7 @@ struct NewGVN::ValueDFS {
+ };
+ 
+ void NewGVN::convertDenseToDFSOrdered(CongruenceClass::MemberSet &Dense,
+-                                      std::vector<ValueDFS> &DFSOrderedSet,
+-                                      bool Coercible) {
++                                      std::vector<ValueDFS> &DFSOrderedSet) {
+   for (auto D : Dense) {
+     // First add the value
+     BasicBlock *BB = getBlockForValue(D);
+@@ -2338,7 +1428,6 @@ void NewGVN::convertDenseToDFSOrdered(CongruenceClass::MemberSet &Dense,
+     VD.DFSIn = DFSPair.first;
+     VD.DFSOut = DFSPair.second;
+     VD.Val = D;
+-    VD.Coercible = Coercible;
+     // If it's an instruction, use the real local dfs number,
+     if (Instruction *I = dyn_cast<Instruction>(D))
+       VD.LocalNum = InstrDFS[I];
+@@ -2351,7 +1440,6 @@ void NewGVN::convertDenseToDFSOrdered(CongruenceClass::MemberSet &Dense,
+     for (auto &U : D->uses()) {
+       if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
+         ValueDFS VD;
+-        VD.Coercible = Coercible;
+         // Put the phi node uses in the incoming block
+         BasicBlock *IBlock;
+         if (PHINode *P = dyn_cast<PHINode>(I)) {
+@@ -2426,30 +1514,11 @@ void NewGVN::deleteInstructionsInBlock(BasicBlock *BB) {
+     if (!Inst.use_empty())
+       Inst.replaceAllUsesWith(UndefValue::get(Inst.getType()));
+     if (isa<LandingPadInst>(Inst))
+-        continue;
++      continue;
+ 
+     Inst.eraseFromParent();
+     ++NumGVNInstrDeleted;
+   }
+-#if 0
+-  // Delete the instructions backwards, as it has a reduced likelihood of having
+-  // to update as many def-use and use-def chains.
+-  Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
+-  while (EndInst->getIterator() != BB->begin()) {
+-    // Delete the next to last instruction.
+-    BasicBlock::iterator I = EndInst->getIterator();
+-    Instruction *Inst = &*--I;
+-    if (!Inst->use_empty())
+-      Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
+-    if (isa<LandingPadInst>(Inst)) {
+-      EndInst = Inst;
+-      continue;
+-    }
+-    Inst->eraseFromParent();
+-    BB->getInstList().erase(Inst);
+-    ++NumGVNInstrDeleted;
+-  }
+-#endif
+ }
+ 
+ void NewGVN::markInstructionForDeletion(Instruction *I) {
+@@ -2504,372 +1573,6 @@ private:
+ };
+ }
+ 
+-/// CanCoerceMustAliasedValueToLoad - Return true if
+-/// CoerceAvailableValueToLoadType will succeed.
+-bool NewGVN::canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy) {
+-
+-  // If the loaded or stored value is an first class array or struct, don't
+-  // try
+-  // to transform them.  We need to be able to bitcast to integer.
+-  if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
+-      StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy())
+-    return false;
+-
+-  // The store has to be at least as big as the load.
+-  if (DL->getTypeSizeInBits(StoredVal->getType()) <
+-      DL->getTypeSizeInBits(LoadTy))
+-    return false;
+-
+-  return true;
+-}
+-
+-/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory,
+-/// and
+-/// then a load from a must-aliased pointer of a different type, try to coerce
+-/// the stored value.  LoadedTy is the type of the load we want to replace and
+-/// InsertPt is the place to insert new instructions.
+-///
+-/// If we can't do it, return null.
+-Value *NewGVN::coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
+-                                              Instruction *InsertPt) {
+-
+-  if (!canCoerceMustAliasedValueToLoad(StoredVal, LoadedTy))
+-    return 0;
+-
+-  // If this is already the right type, just return it.
+-  Type *StoredValTy = StoredVal->getType();
+-
+-  uint64_t StoreSize = DL->getTypeSizeInBits(StoredValTy);
+-  uint64_t LoadSize = DL->getTypeSizeInBits(LoadedTy);
+-
+-  // If the store and reload are the same size, we can always reuse it.
+-  if (StoreSize == LoadSize) {
+-    // Pointer to Pointer -> use bitcast.
+-    if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) {
+-      Instruction *I = new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
+-      handleNewInstruction(I);
+-      return I;
+-    }
+-
+-    // Convert source pointers to integers, which can be bitcast.
+-    if (StoredValTy->isPointerTy()) {
+-      StoredValTy = DL->getIntPtrType(StoredValTy->getContext());
+-      Instruction *I = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
+-      StoredVal = I;
+-      handleNewInstruction(I);
+-    }
+-
+-    Type *TypeToCastTo = LoadedTy;
+-    if (TypeToCastTo->isPointerTy())
+-      TypeToCastTo = DL->getIntPtrType(StoredValTy->getContext());
+-
+-    if (StoredValTy != TypeToCastTo) {
+-      Instruction *I = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
+-      StoredVal = I;
+-      handleNewInstruction(I);
+-    }
+-
+-    // Cast to pointer if the load needs a pointer type.
+-    if (LoadedTy->isPointerTy()) {
+-      Instruction *I = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
+-      StoredVal = I;
+-      handleNewInstruction(I);
+-    }
+-    return StoredVal;
+-  }
+-
+-  // If the loaded value is smaller than the available value, then we can
+-  // extract out a piece from it.  If the available value is too small, then
+-  // we
+-  // can't do anything.
+-  assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
+-
+-  // Convert source pointers to integers, which can be manipulated.
+-  if (StoredValTy->isPointerTy()) {
+-    StoredValTy = DL->getIntPtrType(StoredValTy->getContext());
+-    Instruction *I = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
+-    StoredVal = I;
+-    handleNewInstruction(I);
+-  }
+-
+-  // Convert vectors and fp to integer, which can be manipulated.
+-  if (!StoredValTy->isIntegerTy()) {
+-    StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
+-    Instruction *I = new BitCastInst(StoredVal, StoredValTy, "", InsertPt);
+-    StoredVal = I;
+-    handleNewInstruction(I);
+-  }
+-
+-  // If this is a big-endian system, we need to shift the value down to the
+-  // low
+-  // bits so that a truncate will work.
+-  if (DL->isBigEndian()) {
+-    Constant *Val =
+-        ConstantInt::get(StoredVal->getType(), StoreSize - LoadSize);
+-    StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt);
+-    if (Instruction *I = dyn_cast<Instruction>(StoredVal))
+-      handleNewInstruction(I);
+-  }
+-
+-  // Truncate the integer to the right size now.
+-  Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
+-  Instruction *I = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt);
+-  StoredVal = I;
+-  handleNewInstruction(I);
+-
+-  if (LoadedTy == NewIntTy)
+-    return StoredVal;
+-
+-  // If the result is a pointer, inttoptr.
+-  if (LoadedTy->isPointerTy()) {
+-    I = new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
+-    handleNewInstruction(I);
+-    return I;
+-  }
+-
+-  // Otherwise, bitcast.
+-  I = new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
+-  handleNewInstruction(I);
+-  return I;
+-}
+-
+-/// GetStoreValueForLoad - This function is called when we have a
+-/// memdep query of a load that ends up being a clobbering store.  This means
+-/// that the store provides bits used by the load but we the pointers don't
+-/// mustalias.  Check this case to see if there is anything more we can do
+-/// before we give up.
+-Value *NewGVN::getStoreValueForLoad(Value *SrcVal, unsigned Offset,
+-                                    Type *LoadTy, Instruction *InsertPt) {
+-
+-  LLVMContext &Ctx = SrcVal->getType()->getContext();
+-
+-  uint64_t StoreSize = (DL->getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
+-  uint64_t LoadSize = (DL->getTypeSizeInBits(LoadTy) + 7) / 8;
+-
+-  IRBuilder<> Builder(InsertPt);
+-
+-  // Compute which bits of the stored value are being used by the load.  Convert
+-  // to an integer type to start with.
+-  if (SrcVal->getType()->getScalarType()->isPointerTy()) {
+-    SrcVal =
+-        Builder.CreatePtrToInt(SrcVal, DL->getIntPtrType(SrcVal->getType()));
+-    if (Instruction *I = dyn_cast<Instruction>(SrcVal))
+-      handleNewInstruction(I);
+-  }
+-
+-  if (!SrcVal->getType()->isIntegerTy()) {
+-    SrcVal =
+-        Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8));
+-    if (Instruction *I = dyn_cast<Instruction>(SrcVal))
+-      handleNewInstruction(I);
+-  }
+-  // Shift the bits to the least significant depending on endianness.
+-  unsigned ShiftAmt;
+-  if (DL->isLittleEndian())
+-    ShiftAmt = Offset * 8;
+-  else
+-    ShiftAmt = (StoreSize - LoadSize - Offset) * 8;
+-
+-  if (ShiftAmt) {
+-    SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt);
+-    if (Instruction *I = dyn_cast<Instruction>(SrcVal))
+-      handleNewInstruction(I);
+-  }
+-  if (LoadSize != StoreSize) {
+-    SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize * 8));
+-    if (Instruction *I = dyn_cast<Instruction>(SrcVal))
+-      handleNewInstruction(I);
+-  }
+-  return coerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt);
+-}
+-
+-/// GetLoadValueForLoad - This function is called when we have a
+-/// memdep query of a load that ends up being a clobbering load.  This means
+-/// that the load *may* provide bits used by the load but we can't be sure
+-/// because the pointers don't mustalias.  Check this case to see if there is
+-/// anything more we can do before we give up.
+-Value *NewGVN::getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
+-                                   Type *LoadTy, Instruction *InsertPt) {
+-  // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
+-  // widen SrcVal out to a larger load.
+-  unsigned SrcValSize = DL->getTypeStoreSize(SrcVal->getType());
+-  unsigned LoadSize = DL->getTypeStoreSize(LoadTy);
+-  if (Offset + LoadSize > SrcValSize) {
+-    assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
+-    assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
+-    // If we have a load/load clobber an DepLI can be widened to cover this
+-    // load, then we should widen it to the next power of 2 size big enough!
+-    unsigned NewLoadSize = Offset + LoadSize;
+-    if (!isPowerOf2_32(NewLoadSize))
+-      NewLoadSize = NextPowerOf2(NewLoadSize);
+-
+-    Value *PtrVal = SrcVal->getPointerOperand();
+-
+-    // Insert the new load after the old load.  This ensures that subsequent
+-    // memdep queries will find the new load.  We can't easily remove the old
+-    // load completely because it is already in the value numbering table.
+-    IRBuilder<> Builder(SrcVal->getParent(), ++(SrcVal->getIterator()));
+-    Type *DestPTy = IntegerType::get(LoadTy->getContext(), NewLoadSize * 8);
+-    DestPTy =
+-        PointerType::get(DestPTy, PtrVal->getType()->getPointerAddressSpace());
+-    Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
+-    PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
+-    if (Instruction *I = dyn_cast<Instruction>(PtrVal))
+-      handleNewInstruction(I);
+-    LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
+-    NewLoad->takeName(SrcVal);
+-    NewLoad->setAlignment(SrcVal->getAlignment());
+-    handleNewInstruction(NewLoad);
+-    DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
+-    DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
+-    // This ensures we forward other coercions onto the new load, instead of the
+-    // old one
+-    CoercionForwarding[SrcVal] = NewLoad;
+-
+-    // Replace uses of the original load with the wider load.  On a big endian
+-    // system, we need to shift down to get the relevant bits.
+-    Value *RV = NewLoad;
+-    if (DL->isBigEndian()) {
+-      RV = Builder.CreateLShr(
+-          RV, NewLoadSize * 8 - SrcVal->getType()->getPrimitiveSizeInBits());
+-      if (Instruction *I = dyn_cast<Instruction>(RV))
+-        handleNewInstruction(I);
+-    }
+-
+-    RV = Builder.CreateTrunc(RV, SrcVal->getType());
+-    if (Instruction *I = dyn_cast<Instruction>(RV))
+-      handleNewInstruction(I);
+-
+-    // markUsersTouched(SrcVal);
+-    // assert(false && "Need to debug this");
+-    // So, we just widened a load that we will have already gone past in
+-    // elimination, so in order to get rid of the uses, we have to do it here
+-
+-    SrcVal->replaceAllUsesWith(RV);
+-
+-    // We would like to use gvn.markInstructionForDeletion here, but we can't
+-    // because the load is already memoized into the leader map table that GVN
+-    // tracks.  It is potentially possible to remove the load from the table,
+-    // but then there all of the operations based on it would need to be
+-    // rehashed.  Just leave the dead load around.
+-    // FIXME: This is no longer a problem
+-    markInstructionForDeletion(SrcVal);
+-    SrcVal = NewLoad;
+-  }
+-
+-  return getStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt);
+-}
+-
+-/// GetMemInstValueForLoad - This function is called when we have a
+-/// memdep query of a load that ends up being a clobbering mem intrinsic.
+-Value *NewGVN::getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
+-                                      Type *LoadTy, Instruction *InsertPt,
+-                                      bool NoNewInst) {
+-
+-  LLVMContext &Ctx = LoadTy->getContext();
+-  uint64_t LoadSize = DL->getTypeSizeInBits(LoadTy) / 8;
+-
+-  IRBuilder<> Builder(InsertPt);
+-
+-  // We know that this method is only called when the mem transfer fully
+-  // provides the bits for the load.
+-  if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
+-    // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
+-    // independently of what the offset is.
+-    Value *Val = MSI->getValue();
+-    if (LoadSize != 1) {
+-      if (NoNewInst)
+-        return nullptr;
+-      Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize * 8));
+-      if (Instruction *I = dyn_cast<Instruction>(Val))
+-        handleNewInstruction(I);
+-    }
+-
+-    Value *OneElt = Val;
+-
+-    // Splat the value out to the right number of bits.
+-    for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize;) {
+-      if (NoNewInst)
+-        return nullptr;
+-
+-      // If we can double the number of bytes set, do it.
+-      if (NumBytesSet * 2 <= LoadSize) {
+-        Value *ShVal = Builder.CreateShl(Val, NumBytesSet * 8);
+-        if (Instruction *I = dyn_cast<Instruction>(ShVal))
+-          handleNewInstruction(I);
+-        Val = Builder.CreateOr(Val, ShVal);
+-        if (Instruction *I = dyn_cast<Instruction>(Val))
+-          handleNewInstruction(I);
+-        NumBytesSet <<= 1;
+-        continue;
+-      }
+-
+-      // Otherwise insert one byte at a time.
+-      Value *ShVal = Builder.CreateShl(Val, 1 * 8);
+-      if (Instruction *I = dyn_cast<Instruction>(ShVal))
+-        handleNewInstruction(I);
+-
+-      Val = Builder.CreateOr(OneElt, ShVal);
+-      if (Instruction *I = dyn_cast<Instruction>(Val))
+-        handleNewInstruction(I);
+-
+-      ++NumBytesSet;
+-    }
+-
+-    return coerceAvailableValueToLoadType(Val, LoadTy, InsertPt);
+-  }
+-
+-  // Otherwise, this is a memcpy/memmove from a constant global.
+-  MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
+-  Constant *Src = cast<Constant>(MTI->getSource());
+-  unsigned AS = Src->getType()->getPointerAddressSpace();
+-
+-  // Otherwise, see if we can constant fold a load from the constant with the
+-  // offset applied as appropriate.
+-  Src = ConstantExpr::getBitCast(
+-      Src, llvm::Type::getInt8PtrTy(Src->getContext(), AS));
+-  Constant *OffsetCst =
+-      ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+-  Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
+-                                       OffsetCst);
+-  Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
+-  return ConstantFoldLoadFromConstPtr(Src, LoadTy, *DL);
+-}
+-
+-Value *NewGVN::coerceLoad(Value *V) {
+-  assert(isa<LoadInst>(V) && "Trying to coerce something other than a load");
+-  LoadInst *LI = cast<LoadInst>(V);
+-  // This is an offset, source pair
+-  const std::pair<unsigned, Value *> &Info = CoercionInfo.lookup(LI);
+-  Value *Result;
+-  Value *RealValue = Info.second;
+-  // Walk all the coercion fowarding chains, in case this load has already been
+-  // widened into another load
+-  while (true) {
+-    auto ForwardingResult = CoercionForwarding.find(RealValue);
+-    if (ForwardingResult != CoercionForwarding.end())
+-      RealValue = ForwardingResult->second;
+-    else
+-      break;
+-  }
+-
+-  assert(DT->dominates(cast<Instruction>(RealValue), LI) &&
+-         "Trying to replace a load with one that doesn't dominate it");
+-  if (StoreInst *DepSI = dyn_cast<StoreInst>(RealValue))
+-    Result = getStoreValueForLoad(DepSI->getValueOperand(), Info.first,
+-                                  LI->getType(), LI);
+-  else if (LoadInst *DepLI = dyn_cast<LoadInst>(RealValue))
+-    Result = getLoadValueForLoad(DepLI, Info.first, LI->getType(), LI);
+-  else if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(RealValue))
+-    Result = getMemInstValueForLoad(DepMI, Info.first, LI->getType(), LI);
+-  else
+-    llvm_unreachable("Unknown coercion type");
+-
+-  assert(Result && "Should have been able to coerce");
+-  DEBUG(dbgs() << "Coerced load " << *LI << " output is " << *Result << "\n");
+-  return Result;
+-}
+-
+ bool NewGVN::eliminateInstructions(Function &F) {
+   // This is a non-standard eliminator. The normal way to eliminate is
+   // to walk the dominator tree in order, keeping track of available
+@@ -2960,9 +1663,8 @@ bool NewGVN::eliminateInstructions(Function &F) {
+ 
+     } else {
+       DEBUG(dbgs() << "Eliminating in congruence class " << CC->ID << "\n");
+-      // If this is a singleton, with no equivalences or coercible members, we
+-      // can skip it
+-      if (CC->Members.size() != 1 || !CC->CoercibleMembers.empty()) {
++      // If this is a singleton, we can skip it
++      if (CC->Members.size() != 1) {
+ 
+         // This is a stack because equality replacement/etc may place
+         // constants in the middle of the member list, and we want to use
+@@ -2971,11 +1673,11 @@ bool NewGVN::eliminateInstructions(Function &F) {
+ 
+         ValueDFSStack EliminationStack;
+ 
+-        // Convert the members and equivalences to DFS ordered sets and
++        // Convert the members to DFS ordered sets and
+         // then merge them.
+         std::vector<ValueDFS> DFSOrderedSet;
+-        convertDenseToDFSOrdered(CC->Members, DFSOrderedSet, false);
+-        convertDenseToDFSOrdered(CC->CoercibleMembers, DFSOrderedSet, true);
++        convertDenseToDFSOrdered(CC->Members, DFSOrderedSet);
++
+         // Sort the whole thing
+         sort(DFSOrderedSet.begin(), DFSOrderedSet.end());
+ 
+@@ -2984,7 +1686,6 @@ bool NewGVN::eliminateInstructions(Function &F) {
+           int MemberDFSOut = C.DFSOut;
+           Value *Member = C.Val;
+           Use *MemberUse = C.U;
+-          bool Coercible = C.Coercible;
+ 
+           // We ignore void things because we can't get a value from
+           // them.
+@@ -3027,8 +1728,6 @@ bool NewGVN::eliminateInstructions(Function &F) {
+             // Push if we need to
+             ShouldPush |= Member && EliminationStack.empty();
+             if (ShouldPush) {
+-              if (Coercible)
+-                Member = coerceLoad(Member);
+               EliminationStack.push_back(Member, MemberDFSIn, MemberDFSOut);
+             }
+           }
+@@ -3084,630 +1783,8 @@ bool NewGVN::eliminateInstructions(Function &F) {
+       MembersLeft.insert(Member);
+     }
+     CC->Members.swap(MembersLeft);
+-    CC->CoercibleMembers.clear();
+   }
+ 
+   return AnythingReplaced;
+ }
+ 
+-Value *NewGVN::AvailableValue::MaterializeAdjustedValue(LoadInst *LI,
+-                                                        Instruction *InsertPt,
+-                                                        NewGVN &gvn) const {
+-  Value *Res;
+-  Type *LoadTy = LI->getType();
+-
+-  if (isSimpleValue()) {
+-    Res = getSimpleValue();
+-    if (Res->getType() != LoadTy) {
+-      Res = gvn.getStoreValueForLoad(Res, Offset, LoadTy, InsertPt);
+-
+-      DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
+-                   << *getSimpleValue() << '\n'
+-                   << *Res << '\n'
+-                   << "\n\n\n");
+-    }
+-  } else if (isCoercedLoadValue()) {
+-    LoadInst *Load = getCoercedLoadValue();
+-    if (Load->getType() == LoadTy && Offset == 0) {
+-      Res = Load;
+-    } else {
+-      Res = gvn.getLoadValueForLoad(Load, Offset, LoadTy, InsertPt);
+-
+-      DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << "  "
+-                   << *getCoercedLoadValue() << '\n'
+-                   << *Res << '\n'
+-                   << "\n\n\n");
+-    }
+-  } else if (isMemIntrinValue()) {
+-    Res = gvn.getMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy,
+-                                     InsertPt);
+-    DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+-                 << "  " << *getMemIntrinValue() << '\n'
+-                 << *Res << '\n'
+-                 << "\n\n\n");
+-  } else {
+-    assert(isUndefValue() && "Should be UndefVal");
+-    DEBUG(dbgs() << "GVN COERCED NONLOCAL Undef:\n";);
+-    return UndefValue::get(LoadTy);
+-  }
+-  assert(Res && "failed to materialize?");
+-  return Res;
+-}
+-#if 0
+-Value *
+-NewGVN::AvailableValueInBlock::MaterializeAdjustedValue(Instruction *I,
+-                                                        NewGVN &gvn) const {
+-  if (!isa<LoadInst>(I)) {
+-    assert((isSimpleValue() || isUndefValue()) &&
+-           "Should have been a simple or undef value for a non-load!");
+-    if (isSimpleValue())
+-      return getSimpleValue();
+-    else
+-      return UndefValue::get(I->getType());
+-  }
+-
+-  Value *Res;
+-  Type *LoadTy = I->getType();
+-
+-  if (isSimpleValue()) {
+-    Res = getSimpleValue();
+-    if (Res->getType() != LoadTy) {
+-      Res = gvn.getStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator());
+-
+-      DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
+-                   << *getSimpleValue() << '\n' << *Res << '\n' << "\n\n\n");
+-    }
+-  } else if (isCoercedLoadValue()) {
+-    LoadInst *Load = getCoercedLoadValue();
+-    if (Load->getType() == LoadTy && Offset == 0) {
+-      Res = Load;
+-    } else {
+-      Res = gvn.getLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator());
+-
+-      DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << "  "
+-                   << *getCoercedLoadValue() << '\n' << *Res << '\n'
+-                   << "\n\n\n");
+-    }
+-  } else if (isMemIntrinValue()) {
+-    Res = gvn.getMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy,
+-                                     BB->getTerminator());
+-    DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+-                 << "  " << *getMemIntrinValue() << '\n' << *Res << '\n'
+-                 << "\n\n\n");
+-  } else {
+-    assert(isUndefValue() && "Should be UndefVal");
+-    DEBUG(dbgs() << "GVN COERCED NONLOCAL Undef:\n";);
+-    return UndefValue::get(LoadTy);
+-  }
+-  return Res;
+-}
+-#endif
+-Value *NewGVN::constructSSAForSet(
+-    Instruction *I, SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock) {
+-  // Check for the fully redundant, dominating load case.  In this case, we can
+-  // just use the dominating value directly.
+-  if (ValuesPerBlock.size() == 1 &&
+-      DT->properlyDominates(ValuesPerBlock[0].BB, I->getParent())) {
+-    assert(!ValuesPerBlock[0].AV.isUndefValue() &&
+-           "Dead BB dominate this block");
+-    return ValuesPerBlock[0].MaterializeAdjustedValue(cast<LoadInst>(I), *this);
+-  }
+-  // Otherwise, we have to construct SSA form.
+-  SmallVector<PHINode *, 8> NewPHIs;
+-  SSAUpdater SSAUpdate(&NewPHIs);
+-  SSAUpdate.Initialize(I->getType(), I->getName());
+-
+-  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
+-    const AvailableValueInBlock &AV = ValuesPerBlock[i];
+-    BasicBlock *BB = AV.BB;
+-
+-    if (SSAUpdate.HasValueForBlock(BB))
+-      continue;
+-
+-    SSAUpdate.AddAvailableValue(
+-        BB, AV.MaterializeAdjustedValue(cast<LoadInst>(I), *this));
+-  }
+-
+-  // Perform PHI construction.
+-  Value *V = SSAUpdate.GetValueInMiddleOfBlock(I->getParent());
+-
+-  return V;
+-}
+-
+-/// Split the critical edge connecting the given two blocks, and return
+-/// the block inserted to the critical edge.
+-BasicBlock *NewGVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
+-  BasicBlock *BB =
+-      SplitCriticalEdge(Pred, Succ, CriticalEdgeSplittingOptions(DT));
+-  if (MD)
+-    MD->invalidateCachedPredecessors();
+-  return BB;
+-}
+-
+-void NewGVN::valueNumberNewInstructionToValue(Value *New, Value *Old) {
+-  if (!ValueToClass.count(New))
+-    ValueToClass[New] = InitialClass;
+-  const Expression *NewExpr = createVariableExpression(Old, false);
+-  performCongruenceFinding(New, NewExpr);
+-}
+-
+-void NewGVN::valueNumberNewInstruction(Value *V) {
+-  if (!ValueToClass.count(V))
+-    ValueToClass[V] = InitialClass;
+-  const Expression *NewExpr =
+-      performSymbolicEvaluation(V, cast<Instruction>(V)->getParent());
+-  performCongruenceFinding(V, NewExpr);
+-}
+-
+-Value *NewGVN::regenerateExpression(const Expression *E, BasicBlock *BB) {
+-#if FIXME
+-  Value *V = findPRELeader(E, BB, nullptr);
+-  if (V)
+-    return V;
+-  if (const LoadExpression *LE = dyn_cast<LoadExpression>(E)) {
+-    LoadInst *LI = new LoadInst(LE->getOperand(0), "loadpre", false,
+-                                LE->getAlignment(), BB->getTerminator());
+-    const_cast<LoadExpression *>(LE)->setLoadInst(LI);
+-    MSSA->addNewMemoryUse(LI, MemorySSA::InsertionPlace::End);
+-
+-    return LI;
+-  } else if (const BasicExpression *BE = dyn_cast<BasicExpression>(E)) {
+-    unsigned Opcode = BE->getOpcode();
+-    if (Instruction::isBinaryOp(Opcode)) {
+-      BinaryOperator *BO = BinaryOperator::Create(
+-          Instruction::BinaryOps(Opcode), BE->getOperand(0), BE->getOperand(1),
+-          "binaryoppre", BB->getTerminator());
+-      // FIXME: Track NSW/NUW
+-      return BO;
+-    } else if (Opcode == Instruction::MemoryOps::GetElementPtr) {
+-#if FIXME
+-      // It wants the pointee type, which is complex, and not equivalent to
+-      // getType on the original GEP
+-      // FIXME: Store getSrcType on the GEP
+-      Type *PointeeType =
+-          cast<SequentialType>(BE->getOperand(0)->getType()->getScalarType())
+-              ->getElementType();
+-#endif
+-      Value *GEP = nullptr;
+-      if (Value *V = SimplifyGEPInst(
+-              BE->getType(), makeArrayRef(BE->ops_begin(), BE->ops_end()), *DL,
+-              TLI, DT, AC))
+-        GEP = V;
+-      else
+-        GEP = GetElementPtrInst::Create(
+-            BE->getType(), BE->getOperand(0),
+-            makeArrayRef(BE->ops_begin(), BE->ops_end()).slice(1), "geppre",
+-            BB->getTerminator());
+-      // FIXME: Track inbounds
+-      return GEP;
+-    } else if (((Opcode & 0xff00) >> 8) == Instruction::ICmp) {
+-      CmpInst::Predicate Pred = (CmpInst::Predicate)(Opcode & 0xff);
+-      ICmpInst *Cmp = new ICmpInst(BB->getTerminator(), Pred, BE->getOperand(0),
+-                                   BE->getOperand(1), "icmppre");
+-      return Cmp;
+-    } else if (((Opcode & 0xff00) >> 8) == Instruction::FCmp) {
+-      CmpInst::Predicate Pred = (CmpInst::Predicate)(Opcode & 0xff);
+-      FCmpInst *Cmp = new FCmpInst(BB->getTerminator(), Pred, BE->getOperand(0),
+-                                   BE->getOperand(1), "fcmppre");
+-      return Cmp;
+-    } else
+-      llvm_unreachable("What!");
+-  }
+-  llvm_unreachable("What!");
+-#endif
+-  return nullptr;
+-}
+-
+-bool NewGVN::performPRE(Instruction *I, AvailValInBlkVect &ValuesPerBlock,
+-                        UnavailBlkVect &UnavailableBlocks) {
+-  bool Changed = false;
+-  // Okay, we have *some* definitions of the value.  This means that the value
+-  // is available in some of our (transitive) predecessors.  Lets think about
+-  // doing PRE of this instruction.  This will involve inserting a new
+-  // instruction into the
+-  // predecessor when it's not available.  We could do this in general, but
+-  // prefer to not increase code size.  As such, we only do this when we know
+-  // that we only have to insert *one* load (which means we're basically moving
+-  // the load, not inserting a new one).
+-
+-  // Decide whether PRE is profitable for this load.
+-  unsigned NumUnavailablePreds = UnavailableBlocks.size();
+-  assert(NumUnavailablePreds != 0 &&
+-         "Fully available value should already be eliminated!");
+-
+-  // If this load is unavailable in multiple predecessors, reject it.
+-  // FIXME: If we could restructure the CFG, we could make a common pred with
+-  // all the preds that don't have an available LI and insert a new load into
+-  // that one block.
+-  if (NumUnavailablePreds > 2)
+-    return false;
+-
+-  // Check if the load can safely be moved to all the unavailable predecessors.
+-  bool CanDoPRE = true;
+-
+-  SmallVector<Instruction *, 8> NewInsts;
+-  // FIXME: Insert Can Be Avail stuff here
+-  if (!CanDoPRE) {
+-    while (!NewInsts.empty()) {
+-      Instruction *I = NewInsts.pop_back_val();
+-      if (MD)
+-        MD->removeInstruction(I);
+-      I->eraseFromParent();
+-    }
+-    // HINT: Don't revert the edge-splitting as following transformation may
+-    // also need to split these critical edges.
+-    return Changed;
+-  }
+-
+-  // Okay, we can eliminate this load by inserting a reload in the predecessor
+-  // and using PHI construction to get the value in the other predecessors, do
+-  // it.
+-  DEBUG(dbgs() << "GVN REMOVING PRE INSTRUCTION: " << *I << '\n');
+-  DEBUG(if (!NewInsts.empty()) dbgs() << "INSERTED " << NewInsts.size()
+-                                      << " INSTS: " << *NewInsts.back()
+-                                      << '\n');
+-
+-  // Assign value numbers to the new instructions.
+-  for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
+-    valueNumberNewInstruction(NewInsts[i]);
+-  }
+-
+-  for (auto &UnavailInfo : UnavailableBlocks)
+-    if (!UnavailInfo.second)
+-      return false;
+-
+-  for (auto &UnavailInfo : UnavailableBlocks) {
+-    BasicBlock *UnavailablePred = UnavailInfo.first;
+-
+-    Value *NewVal = regenerateExpression(UnavailInfo.second, UnavailablePred);
+-    valueNumberNewInstruction(NewVal);
+-    if (Instruction *NewInst = dyn_cast<Instruction>(NewVal)) {
+-      NewInst->setDebugLoc(I->getDebugLoc());
+-      LoadInst *LI = dyn_cast<LoadInst>(I);
+-      LoadInst *NewLoadInst = dyn_cast<LoadInst>(NewInst);
+-      // Transfer load tags
+-      if (LI && NewLoadInst) {
+-        AAMDNodes Tags;
+-        LI->getAAMetadata(Tags);
+-        if (Tags)
+-          NewLoadInst->setAAMetadata(Tags);
+-        if (MD)
+-          MD->invalidateCachedPointerInfo(NewLoadInst->getPointerOperand());
+-      }
+-    }
+-
+-    // Add the newly created load.
+-    ValuesPerBlock.push_back(
+-        AvailableValueInBlock::get(UnavailablePred, NewVal));
+-    DEBUG(dbgs() << "GVN INSERTED " << *NewVal << '\n');
+-  }
+-
+-  // Perform PHI construction.
+-  Value *V = constructSSAForSet(I, ValuesPerBlock);
+-  I->replaceAllUsesWith(V);
+-  PREValueForwarding[I] = V;
+-  // Value of V is the same as the value of the old instruction (or it would not
+-  // be redundant)
+-  valueNumberNewInstructionToValue(V, I);
+-
+-  ValueToExpression[I] = ValueToExpression[V];
+-  // I no longer exists
+-  ValueToClass.lookup(I)->Members.erase(I);
+-  if (isa<PHINode>(V))
+-    V->takeName(I);
+-  if (MD && V->getType()->getScalarType()->isPointerTy())
+-    MD->invalidateCachedPointerInfo(V);
+-  markInstructionForDeletion(I);
+-  ++NumPRELoad;
+-  return true;
+-}
+-
+-void NewGVN::analyzeAvailability(Instruction *I,
+-                                 AvailValInBlkVect &ValuesPerBlock,
+-                                 UnavailBlkVect &UnavailableBlocks) {
+-  for (auto P : PredCache.get(I->getParent())) {
+-    if (!ReachableBlocks.count(P)) {
+-      ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(P));
+-      continue;
+-    }
+-    const Expression *Before = ValueToExpression.lookup(I);
+-    // This can happen if it was too complex or complicated an expression for
+-    // GVN to analyze
+-    if (!Before)
+-      return;
+-    const Expression *E = phiTranslateExpression(Before, I->getParent(), P, I);
+-    Value *V = nullptr;
+-    if (E) {
+-      E = trySimplifyPREExpression(E, P);
+-      DEBUG(dbgs() << "After simplification, expression is " << *E << "\n");
+-
+-      V = findPRELeader(E, P, I);
+-      // If we got a store, we can rip out the source value.
+-      // Otherwise, void type stuff is not acceptable
+-      if (V && V->getType()->isVoidTy()) {
+-        if (StoreInst *SI = dyn_cast<StoreInst>(V))
+-          V = SI->getValueOperand();
+-        else
+-          V = nullptr;
+-      }
+-    }
+-
+-    if (!V)
+-      UnavailableBlocks.push_back({P, E});
+-    else
+-      ValuesPerBlock.push_back(AvailableValueInBlock::get(P, V));
+-  }
+-}
+-
+-bool NewGVN::performPREOnClass(CongruenceClass *CC) {
+-
+-  // FIXME: Only do some
+-  bool Changed = false;
+-  AvailValInBlkVect ValuesPerBlock;
+-
+-  for (auto M : CC->Members) {
+-    if (isa<PHINode>(M) || M->getType()->isVoidTy())
+-      continue;
+-    if (!isa<GetElementPtrInst>(M) && !isa<LoadInst>(M))
+-      continue;
+-
+-    if (Instruction *I = dyn_cast<Instruction>(M)) {
+-      UnavailBlkVect UnavailableBlocks;
+-      BasicBlock *IBlock = I->getParent();
+-      if (PredCache.size(IBlock) == 0)
+-        continue;
+-      analyzeAvailability(I, ValuesPerBlock, UnavailableBlocks);
+-
+-      if ((UnavailableBlocks.size() + ValuesPerBlock.size()) !=
+-          PredCache.size(IBlock))
+-        continue;
+-#if 1
+-      // If we have no predecessors that produce a known value for this load,
+-      // exit early.
+-      if (ValuesPerBlock.empty())
+-        continue;
+-#endif
+-      // Step 3: Eliminate fully redundancy.
+-      //
+-      // If all of the instructions we depend on produce a known value for this
+-      // load, then it is fully redundant and we can use PHI insertion to
+-      // compute
+-      // its value.  Insert PHIs and remove the fully redundant value now.
+-      if (UnavailableBlocks.empty()) {
+-        DEBUG(dbgs() << "GVN REMOVING INSTRUCTION: " << *I << '\n');
+-
+-        // Perform PHI construction.
+-        Value *V = constructSSAForSet(I, ValuesPerBlock);
+-        I->replaceAllUsesWith(V);
+-        PREValueForwarding[I] = V;
+-        valueNumberNewInstruction(V);
+-        ValueToClass.lookup(I)->Members.erase(I);
+-        ValueToExpression[I] = ValueToExpression[V];
+-        if (isa<PHINode>(V))
+-          V->takeName(I);
+-        if (MD && V->getType()->getScalarType()->isPointerTy())
+-          MD->invalidateCachedPointerInfo(V);
+-        markInstructionForDeletion(I);
+-        ++NumNewGVNPRE;
+-        Changed = true;
+-        continue;
+-      }
+-      // Step 4: Eliminate partial redundancy.
+-      if (!EnablePRE || !EnableLoadPRE)
+-        continue;
+-      Changed |= performPRE(I, ValuesPerBlock, UnavailableBlocks);
+-    }
+-  }
+-  return Changed;
+-}
+-
+-// Find a leader for OP in BB.
+-Value *NewGVN::findPRELeader(Value *Op, const BasicBlock *BB,
+-                             const Value *MustDominate) {
+-  if (alwaysAvailable(Op))
+-    return Op;
+-
+-  CongruenceClass *CC = ValueToClass[Op];
+-  if (!CC || CC == InitialClass)
+-    return 0;
+-
+-  if (CC->RepLeader && alwaysAvailable(CC->RepLeader))
+-    return CC->RepLeader;
+-  for (auto M : CC->Members) {
+-    if (M == MustDominate)
+-      continue;
+-    if (Instruction *I = dyn_cast<Instruction>(M))
+-      if (DT->dominates(I->getParent(), BB))
+-        return I;
+-  }
+-  return 0;
+-}
+-
+-// Find a leader for OP in BB.
+-Value *NewGVN::findPRELeader(const Expression *E, const BasicBlock *BB,
+-                             const Value *MustDominate) {
+-  if (const ConstantExpression *CE = dyn_cast<ConstantExpression>(E))
+-    return CE->getConstantValue();
+-  else if (const VariableExpression *VE = dyn_cast<VariableExpression>(E))
+-    return findPRELeader(VE->getVariableValue(), BB, MustDominate);
+-
+-  DEBUG(dbgs() << "Hash value was " << E->getHashValue() << "\n");
+-
+-  const auto Result = ExpressionToClass.find(E);
+-  if (Result == ExpressionToClass.end())
+-    return 0;
+-
+-  CongruenceClass *CC = Result->second;
+-
+-  if (!CC || CC == InitialClass)
+-    return 0;
+-
+-  if (CC->RepLeader &&
+-      (isa<Argument>(CC->RepLeader) || isa<Constant>(CC->RepLeader) ||
+-       isa<GlobalValue>(CC->RepLeader)))
+-    return CC->RepLeader;
+-
+-  for (auto M : CC->Members) {
+-    if (M == MustDominate)
+-      continue;
+-    if (Instruction *I = dyn_cast<Instruction>(M))
+-      if (DT->dominates(I->getParent(), BB))
+-        return I;
+-  }
+-  return 0;
+-}
+-
+-MemoryAccess *NewGVN::phiTranslateMemoryAccess(MemoryAccess *MA,
+-                                               const BasicBlock *Pred) {
+-  if (MemoryPhi *MP = dyn_cast<MemoryPhi>(MA)) {
+-    for (const auto &A : MP->operands()) {
+-      if (MP->getIncomingBlock(A) == Pred) {
+-        return cast<MemoryAccess>(A);
+-      }
+-    }
+-    // We should have found something
+-    return nullptr;
+-  }
+-  return MA;
+-}
+-
+-static Value *phiTranslateValue(Value *Incoming, const BasicBlock *Pred) {
+-  // Back translate if defined by a phi in this block
+-  PHINode *P = dyn_cast<PHINode>(Incoming);
+-  int Index = P->getBasicBlockIndex(Pred);
+-  if (Index != -1) {
+-    return P->getIncomingValue(Index);
+-  }
+-  // Not defined by a phi in this block
+-  return Incoming;
+-}
+-
+-bool NewGVN::phiTranslateArguments(const BasicExpression *From,
+-                                   BasicExpression *To, const BasicBlock *Pred,
+-                                   const Value *MustDominate) {
+-  for (unsigned i = 0, e = From->getNumOperands(); i != e; ++i) {
+-    Value *Arg = From->getOperand(i);
+-    Value *Forwarded = PREValueForwarding.lookup(Arg);
+-    if (Forwarded)
+-      Arg = Forwarded;
+-    // Fold in immediate adds
+-    bool processedAdd = false;
+-    if (Instruction *I = dyn_cast<Instruction>(Arg)) {
+-      if (I->getOpcode() == Instruction::Add &&
+-          isa<PHINode>(I->getOperand(0)) &&
+-          isa<ConstantInt>(I->getOperand(1))) {
+-
+-        Constant *RHS = cast<ConstantInt>(I->getOperand(1));
+-        bool isNSW = cast<BinaryOperator>(I)->hasNoSignedWrap();
+-        bool isNUW = cast<BinaryOperator>(I)->hasNoUnsignedWrap();
+-        Value *LHS = phiTranslateValue(I->getOperand(0), Pred);
+-        // If the PHI translated LHS is an add of a constant, fold the
+-        // immediates.
+-        if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS))
+-          if (BOp->getOpcode() == Instruction::Add)
+-            if (ConstantInt *CI = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
+-              LHS = BOp->getOperand(0);
+-              RHS = ConstantExpr::getAdd(RHS, CI);
+-              isNSW = isNUW = false;
+-            }
+-        // FIXME: Handle NSW
+-        const Expression *BinExpr = createBinaryExpression(
+-            I->getOpcode(), I->getType(), LHS, RHS, Pred);
+-        Value *Leader = findPRELeader(BinExpr, Pred, MustDominate);
+-        if (Leader) {
+-          Arg = Leader;
+-          processedAdd = true;
+-        }
+-      }
+-    }
+-    // If arg is still a phi node, and wasn't processed by the add processing
+-    // above, translate it now
+-    if (!processedAdd && isa<PHINode>(Arg))
+-      Arg = phiTranslateValue(Arg, Pred);
+-
+-    Value *Leader = findPRELeader(Arg, Pred, MustDominate);
+-    if (Leader == nullptr)
+-      return false;
+-    To->setOperand(i, Leader);
+-  }
+-  return true;
+-}
+-const Expression *NewGVN::phiTranslateExpression(const Expression *E,
+-                                                 BasicBlock *Curr,
+-                                                 BasicBlock *Pred,
+-                                                 const Value *MustDominate) {
+-  const Expression *ResultExpr = nullptr;
+-  if (const LoadExpression *LE = dyn_cast<LoadExpression>(E)) {
+-
+-    MemoryAccess *MA = phiTranslateMemoryAccess(LE->getDefiningAccess(), Pred);
+-    if (!MA)
+-      return nullptr;
+-    LoadExpression *NLE = createLoadExpression(LE->getType(), LE->getOperand(0),
+-                                               LE->getLoadInst(), MA, Pred);
+-    if (!phiTranslateArguments(LE, NLE, Pred, MustDominate))
+-      return nullptr;
+-    LoadInst *LI = LE->getLoadInst();
+-    MemoryLocation Loc;
+-    if (LI) {
+-      Loc = MemoryLocation::get(LI);
+-      Loc = Loc.getWithNewPtr(NLE->getOperand(0));
+-    } else {
+-      Loc.Ptr = NLE->getOperand(0);
+-    }
+-    MA = MSSAWalker->getClobberingMemoryAccess(MA, Loc);
+-    NLE->setDefiningAccess(MA);
+-    ResultExpr = NLE;
+-  } else if (const BasicExpression *BE = dyn_cast<BasicExpression>(E)) {
+-    BasicExpression *NBE =
+-        new (ExpressionAllocator) BasicExpression(BE->getNumOperands());
+-    NBE->setType(BE->getType());
+-    NBE->setOpcode(BE->getOpcode());
+-    NBE->allocateOperands(ArgRecycler, ExpressionAllocator);
+-    for (unsigned i = 0, e = BE->getNumOperands(); i != e; ++i)
+-      NBE->ops_push_back(nullptr);
+-    if (!phiTranslateArguments(BE, NBE, Pred, MustDominate))
+-      return nullptr;
+-    ResultExpr = NBE;
+-  }
+-
+-  return ResultExpr;
+-}
+-
+-const Expression *NewGVN::trySimplifyPREExpression(const Expression *E,
+-                                                   const BasicBlock *B) {
+-  const Expression *ResultExpr = E;
+-  // This must come first, because LoadExpression's are BasicExpressions
+-  if (const LoadExpression *LE = dyn_cast<LoadExpression>(E)) {
+-    MemoryDef *MD = dyn_cast<MemoryDef>(LE->getDefiningAccess());
+-    if (MD && !MSSA->isLiveOnEntryDef(MD)) {
+-      const Expression *Temp = performSymbolicLoadCoercion(
+-          LE->getType(), LE->getOperand(0), LE->getLoadInst(),
+-          MD->getMemoryInst(), MD, B);
+-      if (Temp)
+-        ResultExpr = Temp;
+-    }
+-  } else if (const BasicExpression *BE = dyn_cast<BasicExpression>(E)) {
+-    unsigned Opcode = BE->getOpcode();
+-    if (Instruction::isBinaryOp(Opcode)) {
+-      Value *V = SimplifyBinOp(Opcode, BE->getOperand(0), BE->getOperand(1),
+-                               *DL, TLI, DT, AC);
+-      if (V) {
+-        if (Constant *C = dyn_cast<Constant>(V))
+-          ResultExpr = createConstantExpression(C, false);
+-        else if (alwaysAvailable(V))
+-          ResultExpr = createVariableExpression(V, false);
+-      }
+-    } else if (Opcode == Instruction::GetElementPtr) {
+-      Value *V = SimplifyGEPInst(BE->getType(),
+-                                 makeArrayRef(BE->ops_begin(), BE->ops_end()),
+-                                 *DL, TLI, DT, AC);
+-      if (V) {
+-        if (Constant *C = dyn_cast<Constant>(V))
+-          ResultExpr = createConstantExpression(C, false);
+-        else if (alwaysAvailable(V))
+-          ResultExpr = createVariableExpression(V, false);
+-      }
+-    }
+-  }
+-  return ResultExpr;
+-}
Index: test/Transforms/NewGVN/completeness.ll
===================================================================
--- /dev/null
+++ test/Transforms/NewGVN/completeness.ll
@@ -0,0 +1,389 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define i32 @test1(i32, i8**) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP0:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]]
+; CHECK:         br label [[TMP6:%.*]]
+; CHECK:         br label [[TMP6]]
+; CHECK:         [[TMP7:%.*]] = phi i32 [ 75, [[TMP4]] ], [ 105, [[TMP5]] ]
+; CHECK-NEXT:    [[DOT0:%.*]] = phi i32 [ 5, [[TMP4]] ], [ 7, [[TMP5]] ]
+; CHECK-NEXT:    ret i32 [[TMP7]]
+;
+  %3 = icmp ne i32 %0, 0
+  br i1 %3, label %4, label %5
+
+; <label>:4:                                      ; preds = %2
+  br label %6
+
+; <label>:5:                                      ; preds = %2
+  br label %6
+
+; <label>:6:                                      ; preds = %5, %4
+  %.0 = phi i32 [ 5, %4 ], [ 7, %5 ]
+  %7 = mul nsw i32 %.0, 15
+  ret i32 %7
+}
+
+define i32 @test2(i32) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP0:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
+; CHECK:         br label [[TMP5:%.*]]
+; CHECK:         br label [[TMP5]]
+; CHECK:         [[DOT01:%.*]] = phi i32 [ 3, [[TMP3]] ], [ 2, [[TMP4]] ]
+; CHECK-NEXT:    [[DOT0:%.*]] = phi i32 [ 2, [[TMP3]] ], [ 3, [[TMP4]] ]
+; CHECK-NEXT:    ret i32 5
+;
+  %2 = icmp ne i32 %0, 0
+  br i1 %2, label %3, label %4
+
+; <label>:3:                                      ; preds = %1
+  br label %5
+
+; <label>:4:                                      ; preds = %1
+  br label %5
+
+; <label>:5:                                      ; preds = %4, %3
+  %.01 = phi i32 [ 3, %3 ], [ 2, %4 ]
+  %.0 = phi i32 [ 2, %3 ], [ 3, %4 ]
+  %6 = add nsw i32 %.01, %.0
+  ret i32 %6
+}
+define i32 @test3(i1 %which) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ -877, [[ENTRY:%.*]] ], [ 113, [[DELAY]] ]
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 1000, [[ENTRY]] ], [ 10, [[DELAY]] ]
+; CHECK-NEXT:    ret i32 [[TMP0]]
+;
+
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi i32 [ 1000, %entry ], [ 10, %delay ]
+  %value = sub i32 123, %A
+  ret i32 %value
+}
+
+define <2 x i32> @test3vec(i1 %which) {
+; CHECK-LABEL: @test3vec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ <i32 -877, i32 -877>, [[ENTRY:%.*]] ], [ <i32 113, i32 113>, [[DELAY]] ]
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 1000, i32 1000>, [[ENTRY]] ], [ <i32 10, i32 10>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[TMP0]]
+;
+
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 1000>, %entry ], [ <i32 10, i32 10>, %delay ]
+  %value = sub <2 x i32> <i32 123, i32 123>, %A
+  ret <2 x i32> %value
+}
+
+define <2 x i32> @test3vec2(i1 %which) {
+; CHECK-LABEL: @test3vec2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ <i32 -877, i32 -2167>, [[ENTRY:%.*]] ], [ <i32 113, i32 303>, [[DELAY]] ]
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 1000, i32 2500>, [[ENTRY]] ], [ <i32 10, i32 30>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[TMP0]]
+;
+
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 2500>, %entry ], [ <i32 10, i32 30>, %delay ]
+  %value = sub <2 x i32> <i32 123, i32 333>, %A
+  ret <2 x i32> %value
+}
+
+;; This example is a bit contrived because we can't create fake memoryuses, so we use two loads in the if blocks
+define i32 @test4(i32, i8**, i32* noalias, i32* noalias) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    store i32 5, i32* [[TMP2:%.*]], align 4
+; CHECK-NEXT:    store i32 7, i32* [[TMP3:%.*]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP0:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]]
+; CHECK:         br label [[TMP8:%.*]]
+; CHECK:         br label [[TMP8]]
+; CHECK:         [[DOT01:%.*]] = phi i32 [ 5, [[TMP6]] ], [ 7, [[TMP7]] ]
+; CHECK-NEXT:    [[DOT0:%.*]] = phi i32* [ [[TMP2]], [[TMP6]] ], [ [[TMP3]], [[TMP7]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOT0]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = mul nsw i32 [[TMP9]], 15
+; CHECK-NEXT:    [[TMP11:%.*]] = mul nsw i32 [[TMP10]], [[DOT01]]
+; CHECK-NEXT:    ret i32 [[TMP11]]
+;
+  store i32 5, i32* %2, align 4
+  store i32 7, i32* %3, align 4
+  %5 = icmp ne i32 %0, 0
+  br i1 %5, label %6, label %8
+
+; <label>:6:                                      ; preds = %4
+  %7 = load i32, i32* %2, align 4
+  br label %10
+
+; <label>:8:                                      ; preds = %4
+  %9 = load i32, i32* %3, align 4
+  br label %10
+
+; <label>:10:                                     ; preds = %8, %6
+  %.01 = phi i32 [ %7, %6 ], [ %9, %8 ]
+  %.0 = phi i32* [ %2, %6 ], [ %3, %8 ]
+  %11 = load i32, i32* %.0, align 4
+  %12 = mul nsw i32 %11, 15
+  %13 = mul nsw i32 %12, %.01
+  ret i32 %13
+}
+
+@global = common global [100 x i64] zeroinitializer, align 16
+@global.1 = common global [100 x i64] zeroinitializer, align 16
+define i64 @test5(i64 %arg) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca i64, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[ARG:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[BB28:%.*]], label [[BB2:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br label [[BB7:%.*]]
+; CHECK:       bb4:
+; CHECK-NEXT:    br label [[BB5:%.*]]
+; CHECK:       bb5:
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP9:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP6]], label [[BB27:%.*]], label [[BB7]]
+; CHECK:       bb7:
+; CHECK-NEXT:    [[TMP8:%.*]] = phi i64 [ [[ARG]], [[BB2]] ], [ [[TMP9]], [[BB5]] ]
+; CHECK-NEXT:    [[TMP9]] = add nsw i64 [[TMP8]], -1
+; CHECK-NEXT:    [[TMP10:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @global, i64 0, i64 0), align 16
+; CHECK-NEXT:    [[TMP11:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @global.1, i64 0, i64 0), align 16
+; CHECK-NEXT:    [[TMP12:%.*]] = mul nsw i64 [[TMP11]], [[TMP10]]
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[TMP12]], 0
+; CHECK-NEXT:    br i1 [[TMP13]], label [[BB5]], label [[BB14:%.*]]
+; CHECK:       bb14:
+; CHECK-NEXT:    br label [[BB15:%.*]]
+; CHECK:       bb15:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i64 [ [[TMP25:%.*]], [[BB15]] ], [ [[TMP12]], [[BB14]] ]
+; CHECK-NEXT:    [[TMP16:%.*]] = phi i64 [ [[TMP24:%.*]], [[BB15]] ], [ [[TMP11]], [[BB14]] ]
+; CHECK-NEXT:    [[TMP17:%.*]] = phi i64 [ [[TMP22:%.*]], [[BB15]] ], [ [[TMP10]], [[BB14]] ]
+; CHECK-NEXT:    [[TMP18:%.*]] = phi i64 [ [[TMP20:%.*]], [[BB15]] ], [ 0, [[BB14]] ]
+; CHECK-NEXT:    store i64 [[TMP0]], i64* [[TMP]], align 8
+; CHECK-NEXT:    [[TMP20]] = add nuw nsw i64 [[TMP18]], 1
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [100 x i64], [100 x i64]* @global, i64 0, i64 [[TMP20]]
+; CHECK-NEXT:    [[TMP22]] = load i64, i64* [[TMP21]], align 8
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [100 x i64], [100 x i64]* @global.1, i64 0, i64 [[TMP20]]
+; CHECK-NEXT:    [[TMP24]] = load i64, i64* [[TMP23]], align 8
+; CHECK-NEXT:    [[TMP25]] = mul nsw i64 [[TMP24]], [[TMP22]]
+; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[TMP20]], [[TMP25]]
+; CHECK-NEXT:    br i1 [[TMP26]], label [[BB4:%.*]], label [[BB15]]
+; CHECK:       bb27:
+; CHECK-NEXT:    br label [[BB28]]
+; CHECK:       bb28:
+; CHECK-NEXT:    ret i64 0
+;
+bb:
+  %tmp = alloca i64, align 8
+  %tmp1 = icmp eq i64 %arg, 0
+  br i1 %tmp1, label %bb28, label %bb2
+
+bb2:                                              ; preds = %bb
+  %tmp3 = bitcast i64* %tmp to i8*
+  br label %bb7
+
+bb4:                                              ; preds = %bb15
+  br label %bb5
+
+bb5:                                              ; preds = %bb7, %bb4
+  %tmp6 = icmp eq i64 %tmp9, 0
+  br i1 %tmp6, label %bb27, label %bb7
+
+bb7:                                              ; preds = %bb5, %bb2
+  %tmp8 = phi i64 [ %arg, %bb2 ], [ %tmp9, %bb5 ]
+  %tmp9 = add nsw i64 %tmp8, -1
+  %tmp10 = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @global, i64 0, i64 0), align 16
+  %tmp11 = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @global.1, i64 0, i64 0), align 16
+  %tmp12 = mul nsw i64 %tmp11, %tmp10
+  %tmp13 = icmp eq i64 %tmp12, 0
+  br i1 %tmp13, label %bb5, label %bb14
+
+bb14:                                             ; preds = %bb7
+  br label %bb15
+
+bb15:                                             ; preds = %bb15, %bb14
+  %tmp16 = phi i64 [ %tmp24, %bb15 ], [ %tmp11, %bb14 ]
+  %tmp17 = phi i64 [ %tmp22, %bb15 ], [ %tmp10, %bb14 ]
+  %tmp18 = phi i64 [ %tmp20, %bb15 ], [ 0, %bb14 ]
+;; This multiply is an op of phis which is really equivalent to phi(tmp25, tmp12)
+  %tmp19 = mul nsw i64 %tmp16, %tmp17
+  store i64 %tmp19, i64* %tmp, align 8
+  %tmp20 = add nuw nsw i64 %tmp18, 1
+  %tmp21 = getelementptr inbounds [100 x i64], [100 x i64]* @global, i64 0, i64 %tmp20
+  %tmp22 = load i64, i64* %tmp21, align 8
+  %tmp23 = getelementptr inbounds [100 x i64], [100 x i64]* @global.1, i64 0, i64 %tmp20
+  %tmp24 = load i64, i64* %tmp23, align 8
+  %tmp25 = mul nsw i64 %tmp24, %tmp22
+  %tmp26 = icmp eq i64 %tmp20, %tmp25
+  br i1 %tmp26, label %bb4, label %bb15
+
+bb27:                                             ; preds = %bb5
+  br label %bb28
+
+bb28:                                             ; preds = %bb27, %bb
+  ret i64 0
+}
+
+;; These icmps are all equivalent to phis of constants
+define i8 @test6(i8* %addr) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:  entry-block:
+; CHECK-NEXT:    br label %main-loop
+; CHECK:       main-loop:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i1 [ true, %entry-block ], [ false, [[CORE:%.*]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i1 [ false, %entry-block ], [ true, [[CORE]] ]
+; CHECK-NEXT:    [[PHI:%.*]] = phi i8 [ 0, %entry-block ], [ 1, [[CORE]] ]
+; CHECK-NEXT:    store volatile i8 0, i8* [[ADDR:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label %busy-wait-phi-0, label [[EXIT:%.*]]
+; CHECK:       busy-wait-phi-0:
+; CHECK-NEXT:    [[LOAD:%.*]] = load volatile i8, i8* [[ADDR]]
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp eq i8 [[LOAD]], 0
+; CHECK-NEXT:    br i1 [[ICMP]], label %busy-wait-phi-0, label [[CORE]]
+; CHECK:       core:
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TRAP:%.*]], label %main-loop
+; CHECK:       trap:
+; CHECK-NEXT:    ret i8 1
+; CHECK:       exit:
+; CHECK-NEXT:    ret i8 0
+;
+entry-block:
+  br label %main-loop
+
+main-loop:
+  %phi = phi i8 [ 0, %entry-block ], [ 1, %core ]
+  %switch_0 = icmp eq i8 %phi, 0
+  store volatile i8 0, i8* %addr
+  br i1 %switch_0, label %busy-wait-phi-0, label %exit
+
+busy-wait-phi-0:
+  %load = load volatile i8, i8* %addr
+  %icmp = icmp eq i8 %load, 0
+  br i1 %icmp, label %busy-wait-phi-0, label %core
+
+core:
+  %switch_1 = icmp eq i8 %phi, 1
+  br i1 %switch_1, label %trap, label %main-loop
+
+trap:
+  ret i8 1
+
+exit:
+  ret i8 0
+}
+
+; Test that we don't infinite loop simplifying
+; an undefined value that can go both ways.
+define void @test7() {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB1]]
+;
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  %tmp = phi i32 [ undef, %bb ], [ %tmp3, %bb1 ]
+  %tmp2 = icmp eq i32 %tmp, 0
+  %tmp3 = select i1 %tmp2, i32 1, i32 %tmp
+  br label %bb1
+}
+
+
+
+; Test that we get a consistent answer about what the
+; value of this undefined select is.
+define void @test8() {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB1]]
+;
+bb:
+  %tmp = select i1 undef, i8 0, i8 1
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  %tmp2 = phi i8 [ %tmp4, %bb1 ], [ %tmp, %bb ]
+  %tmp3 = icmp eq i8 %tmp2, 0
+  %tmp4 = select i1 %tmp3, i8 1, i8 %tmp2
+  br label %bb1
+}
+
+
+;; Make sure we handle the case where we later come up with an expression that we need
+;; for a phi of ops.
+define void @test9() {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br i1 undef, label [[BB1]], label [[BB2:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br label [[BB6:%.*]]
+; CHECK:       bb6:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ -13, [[BB2]] ], [ [[TMP11:%.*]], [[BB6]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = phi i32 [ 1, [[BB2]] ], [ [[TMP8:%.*]], [[BB6]] ]
+; CHECK-NEXT:    [[TMP8]] = add nuw nsw i32 [[TMP7]], 1
+; CHECK-NEXT:    [[TMP11]] = add i32 -14, [[TMP8]]
+; CHECK-NEXT:    br label [[BB6]]
+;
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  br i1 undef, label %bb1, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %tmp = select i1 true, i32 -14, i32 -10
+  %tmp3 = add i32 %tmp, 0
+  %tmp4 = select i1 true, i32 -14, i32 -10
+  %tmp5 = add i32 %tmp4, 0
+  br label %bb6
+
+bb6:                                              ; preds = %bb6, %bb2
+  %tmp7 = phi i32 [ 1, %bb2 ], [ %tmp13, %bb6 ]
+  %tmp8 = add nuw nsw i32 %tmp7, 1
+  %tmp9 = add i32 %tmp3, %tmp7
+  %tmp10 = select i1 false, i32 undef, i32 %tmp9
+  %tmp11 = add i32 %tmp5, %tmp8
+  %tmp12 = select i1 undef, i32 undef, i32 %tmp11
+  %tmp13 = add nuw nsw i32 %tmp7, 1
+  br label %bb6
+}
+
Index: test/Transforms/NewGVN/storeoverstore.ll
===================================================================
--- test/Transforms/NewGVN/storeoverstore.ll
+++ test/Transforms/NewGVN/storeoverstore.ll
@@ -13,11 +13,11 @@
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP1:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]]
 ; CHECK:         br label [[TMP5]]
-; CHECK:         [[DOT0:%.*]] = phi i32 [ 10, [[TMP4]] ], [ 5, [[TMP2:%.*]] ]
-; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP6:%.*]], label [[TMP8:%.*]]
-; CHECK:         [[TMP7:%.*]] = add nsw i32 [[DOT0]], 5
-; CHECK-NEXT:    br label [[TMP8]]
-; CHECK:         [[DOT1:%.*]] = phi i32 [ [[TMP7]], [[TMP6]] ], [ [[DOT0]], [[TMP5]] ]
+; CHECK:         [[TMP6:%.*]] = phi i32 [ 15, [[TMP4]] ], [ 10, [[TMP2:%.*]] ]
+; CHECK-NEXT:    [[DOT0:%.*]] = phi i32 [ 10, [[TMP4]] ], [ 5, [[TMP2]] ]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP7:%.*]], label [[TMP8:%.*]]
+; CHECK:         br label [[TMP8]]
+; CHECK:         [[DOT1:%.*]] = phi i32 [ [[TMP6]], [[TMP7]] ], [ [[DOT0]], [[TMP5]] ]
 ; CHECK-NEXT:    ret i32 [[DOT1]]
 ;
   store i32 5, i32* %0, align 4
@@ -54,11 +54,11 @@
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]]
 ; CHECK:         br label [[TMP6:%.*]]
 ; CHECK:         br label [[TMP6]]
-; CHECK:         [[DOT0:%.*]] = phi i32 [ 10, [[TMP4]] ], [ 5, [[TMP5]] ]
-; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP7:%.*]], label [[TMP9:%.*]]
-; CHECK:         [[TMP8:%.*]] = add nsw i32 [[DOT0]], 5
-; CHECK-NEXT:    br label [[TMP9]]
-; CHECK:         [[DOT1:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ [[DOT0]], [[TMP6]] ]
+; CHECK:         [[TMP7:%.*]] = phi i32 [ 15, [[TMP4]] ], [ 10, [[TMP5]] ]
+; CHECK-NEXT:    [[DOT0:%.*]] = phi i32 [ 10, [[TMP4]] ], [ 5, [[TMP5]] ]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]]
+; CHECK:         br label [[TMP9]]
+; CHECK:         [[DOT1:%.*]] = phi i32 [ [[TMP7]], [[TMP8]] ], [ [[DOT0]], [[TMP6]] ]
 ; CHECK-NEXT:    ret i32 [[DOT1]]
 ;
   store i32 5, i32* %0, align 4