diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -70,13 +70,16 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/CFGPrinter.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InstructionPrecedenceTracking.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Argument.h"
@@ -110,6 +113,7 @@
 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/PredicateInfo.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Transforms/Utils/VNCoercion.h"
 #include <algorithm>
 #include <cassert>
@@ -496,11 +500,13 @@
 class NewGVN {
   Function &F;
   DominatorTree *DT = nullptr;
+  PostDominatorTree *PDT = nullptr;
   const TargetLibraryInfo *TLI = nullptr;
   AliasAnalysis *AA = nullptr;
   MemorySSA *MSSA = nullptr;
   MemorySSAWalker *MSSAWalker = nullptr;
   AssumptionCache *AC = nullptr;
+  ImplicitControlFlowTracking *ICF = nullptr;
   const DataLayout &DL;
   std::unique_ptr<PredicateInfo> PredInfo;
 
@@ -546,6 +552,11 @@
   // created that they are known equivalent to.
   DenseMap<const Value *, PHINode *> RealToTemp;
 
+  // We do not want to apply load coercion in new instructions that are
+  // generated during phi-of-ops optimization. We use this value to bail out in
+  // these cases.
+  Value *CurrentPhiOfOpsInsn = nullptr;
+
   // In order to know when we should re-process instructions that have
   // phi-of-ops, we track the set of expressions that they needed as
   // leaders. When we discover new leaders for those expressions, we process the
@@ -669,10 +680,10 @@
   SmallVector<Instruction *, 2> NewLoadsInLoadCoercion;
 
 public:
-  NewGVN(Function &F, DominatorTree *DT, AssumptionCache *AC,
-         TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA,
-         const DataLayout &DL)
-      : F(F), DT(DT), TLI(TLI), AA(AA), MSSA(MSSA), AC(AC), DL(DL),
+  NewGVN(Function &F, DominatorTree *DT, PostDominatorTree *PDT,
+         AssumptionCache *AC, TargetLibraryInfo *TLI, AliasAnalysis *AA,
+         MemorySSA *MSSA, const DataLayout &DL)
+      : F(F), DT(DT), PDT(PDT), TLI(TLI), AA(AA), MSSA(MSSA), AC(AC), DL(DL),
         PredInfo(std::make_unique<PredicateInfo>(F, *DT, *AC)),
         SQ(DL, TLI, DT, AC, /*CtxI=*/nullptr, /*UseInstrInfo=*/false,
            /*CanUseUndef=*/false) {}
@@ -935,6 +946,16 @@
   // Extract the value that will replace the load from the depending
   // instruction.
   Value *getExtractedValue(LoadInst *, Instruction *);
+  // Emit the phi that replaces the load and it updates the SSA with the new
+  // phi.
+  Value *emitLoadCoercionPhi(
+      LoadInst *, BasicBlock *,
+      SmallVectorImpl<std::pair<BasicBlock *, Instruction *>> &);
+  // Check if the load can be replaced by a phi.
+  Value *tryReplaceLoadWithPhi(
+      LoadInst *, BasicBlock *,
+      SmallVectorImpl<std::pair<BasicBlock *, Instruction *>> &,
+      SmallVectorImpl<BasicBlock *> &);
 };
 
 } // end anonymous namespace
@@ -1163,6 +1184,8 @@
 
   CongruenceClass *CC = ValueToClass.lookup(V);
   if (CC) {
+    if (EnableLoadCoercion && LoadCoercion.count(CC->getLeader()))
+      return ExprResult::none();
     if (CC->getLeader() && CC->getLeader() != I) {
       return ExprResult::some(createVariableOrConstant(CC->getLeader()), V);
     }
@@ -1577,7 +1600,97 @@
   if (It != NewLoadsInLoadCoercion.end())
     return nullptr;
 
-  if (auto *DepSI = dyn_cast<StoreInst>(DepInst)) {
+  if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess)) {
+    // If the candidate load is dominated by a call that never returns, then we
+    // do not replace the load with a phi node because this will break the
+    // semantics of the code.
+    if (ICF->isDominatedByICFIFromSameBlock(LI))
+      return nullptr;
+
+    // The MemoryPhi of Example 1 indicates that the load is dependent on the
+    // store (1) in Basic block T and store (2) in basic block F. Therefore,
+    // both of the store instructions should be added in LoadCoercion map.
+    //
+    // Example 1:
+    //     BB1:                        BB2:
+    //      1 = MemoryDef(liveOnEntry)  2 = MemoryDef(liveOnEntry)
+    //      store i32 100, i32* %P      store i32 500, i32* %P
+    //                             \    /
+    //                            BB3:
+    //                             3 = MemoryPhi({T,1},{F,2})
+    //                             %V = load i32, i32* %P
+    //
+    // In Example 2, the load of BB3 has two dependencies: i. the store in BB1
+    // as the MemoryPhi indicates and the load in BB2 which is not included in
+    // MemoryPhi. To find this dependency, we have to find all the uses that are
+    // live on Entry and check if any of them can optimize out the current load.
+    //
+    // Example 2:
+    //     BB1:                        BB2:
+    //      1 = MemoryDef(liveOnEntry)  0 = MemoryDef(liveOnEntry)
+    //      store i32 100, i32* %P      %V1 = load i32, i32* %P
+    //                             \    /
+    //                            BB3:
+    //                             2 = MemoryPhi({T,1},{F,liveOnEntry})
+    //                             %V2 = load i32, i32* %P
+    //
+
+    // Iterate over all the operands of the memory phi and check if any of its
+    // operands can optimize out the current load.
+    for (Use &Op : MemPhi->incoming_values()) {
+      // Bail out if one of the operands is not a memory use or definition.
+      if (!isa<MemoryUseOrDef>(&Op))
+        return nullptr;
+
+      MemoryUseOrDef *MemAccess = cast<MemoryUseOrDef>(&Op);
+      assert(MemAccess && "Memory definition is exepcted.\n");
+      int Offset = -1;
+      Instruction *DepI = nullptr;
+
+      // If any of the operands of the MemoryPhi is live on entry (Example 2),
+      // then we have to check if there is a load instruction that can optimize
+      // out the current load instruction.
+      if (MSSA->isLiveOnEntryDef(MemAccess)) {
+        MemoryAccess *MemAccess = MSSA->getLiveOnEntryDef();
+        for (const auto &U : MemAccess->uses()) {
+          Offset = -1;
+          auto *MemUse = dyn_cast<MemoryUse>(U.getUser());
+          if (MemUse == nullptr)
+            continue;
+          DepI = MemUse->getMemoryInst();
+          if (!isa<LoadInst>(DepI) || DepI == LI || DepI->getNumUses() == 0)
+            continue;
+          // Check if DepI is in the current incoming block or it is in a
+          // predecessor that dominates the incoming block.
+          BasicBlock *DepIBB = DepI->getParent();
+          BasicBlock *IncomingBB = MemPhi->getIncomingBlock(Op);
+          if (IncomingBB == DepIBB || DT->dominates(DepIBB, IncomingBB))
+            Offset = analyzeLoadFromClobberingLoad(LoadType, LoadPtr,
+                                                   cast<LoadInst>(DepI), DL);
+          if (Offset >= 0)
+            tryAddLoadDepInsnIntoLoadCoercionMap(LI, DepI);
+        }
+      } else {
+        // Check if the MemoryPhi operand can optimize out the current load.
+        DepI = MemAccess->getMemoryInst();
+        if (DT->dominates(LI, DepI)) {
+          // In this case, there is a loop. For now, we bail-out load coercion.
+          const_cast<NewGVN *>(this)->LoadCoercion.erase(LI);
+          return nullptr;
+        } else if (StoreInst *S = dyn_cast<StoreInst>(DepI))
+          Offset = analyzeLoadFromClobberingStore(LoadType, LoadPtr, S, DL);
+        else if (LoadInst *L = dyn_cast<LoadInst>(DepI)) {
+          Offset = analyzeLoadFromClobberingLoad(LoadType, LoadPtr, L, DL);
+        } else {
+          const_cast<NewGVN *>(this)->LoadCoercion.erase(LI);
+          return nullptr;
+        }
+        if (Offset >= 0)
+          tryAddLoadDepInsnIntoLoadCoercionMap(LI, DepI);
+      }
+    }
+    return nullptr;
+  } else if (auto *DepSI = dyn_cast<StoreInst>(DepInst)) {
     // Can't forward from non-atomic to atomic without violating memory model.
     // Also don't need to coerce if they are the same type, we will just
     // propagate.
@@ -1618,8 +1731,8 @@
           return createConstantExpression(PossibleConstant);
         }
       } else if (EnableLoadCoercion) {
-        // Similarly, we do not create a load expression for the loads that are
-        // elimianted with load coercion.
+        // Similarly, we do not create a load expression for the loads that
+        // are elimianted with load coercion.
         tryAddLoadDepInsnIntoLoadCoercionMap(LI, DepInst);
         return nullptr;
       }
@@ -1689,8 +1802,18 @@
   MemoryAccess *DefiningAccess =
       MSSAWalker->getClobberingMemoryAccess(OriginalAccess);
 
+  // If the load is generated during phi-of-ops optimization, then we do not
+  // apply load coercion.
+  if (LI == CurrentPhiOfOpsInsn)
+    return createLoadExpAndUpdateMemUses(LI, LoadAddressLeader, OriginalAccess,
+                                         DefiningAccess);
+
   // Check if we can apply load coercion.
-  if (!MSSA->isLiveOnEntryDef(DefiningAccess)) {
+  if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess)) {
+    if (EnableLoadCoercion)
+      performSymbolicLoadCoercion(LI->getType(), LI->getPointerOperand(), LI,
+                                  nullptr, DefiningAccess);
+  } else if (!MSSA->isLiveOnEntryDef(DefiningAccess)) {
     if (auto *MD = dyn_cast<MemoryDef>(DefiningAccess)) {
       Instruction *DefiningInst = MD->getMemoryInst();
       // If the defining instruction is not reachable, replace with poison.
@@ -1705,7 +1828,7 @@
                                           DefiningInst, DefiningAccess))
         return CoercionResult;
     }
-  } else if (EnableLoadCoercion) {
+  } else if (EnableLoadCoercion && !ICF->isDominatedByICFIFromSameBlock(LI)) {
     // Check if any of the live-in loads can be eliminated with load coercion.
     for (const auto &U : DefiningAccess->uses())
       if (auto *MemUse = dyn_cast<MemoryUse>(U.getUser())) {
@@ -1733,7 +1856,11 @@
           continue;
 
         // The two loads should be executed in the right order.
-        if (DependingLoadDomintatesLI)
+        // TODO: Add analysis that explores more possibilities.
+        if (DependingLoadDomintatesLI ||
+            (ReachableEdges.count(
+                 {DependingLoad->getParent(), LI->getParent()}) &&
+             PDT->dominates(LI, DependingLoad)))
           performSymbolicLoadCoercion(LI->getType(), LI->getPointerOperand(),
                                       LI, DependingLoad,
                                       getMemoryAccess(DependingLoad));
@@ -1776,6 +1903,9 @@
     AdditionallyUsedValue = CmpOp1;
   }
 
+  if (EnableLoadCoercion && LoadCoercion.count(FirstOp))
+    return ExprResult::none();
+
   if (Predicate == CmpInst::ICMP_EQ)
     return ExprResult::some(createVariableOrConstant(FirstOp),
                             AdditionallyUsedValue, PI);
@@ -2955,6 +3085,7 @@
       // Clone the instruction, create an expression from it that is
       // translated back into the predecessor, and see if we have a leader.
       Instruction *ValueOp = I->clone();
+      CurrentPhiOfOpsInsn = ValueOp;
       if (MemAccess)
         TempToMemory.insert({ValueOp, MemAccess});
       bool SafeForPHIOfOps = true;
@@ -3308,10 +3439,12 @@
         // If we created a phi of ops, use it.
         // If we couldn't create one, make sure we don't leave one lying around
         if (PHIE) {
+          LoadCoercion.erase(I);
           Symbolized = PHIE;
         } else if (auto *Op = RealToTemp.lookup(I)) {
           removePhiOfOps(I, Op);
         }
+        CurrentPhiOfOpsInsn = nullptr;
       }
     } else {
       // Mark the instruction as unused so we don't value number it again.
@@ -3627,6 +3760,8 @@
   NumFuncArgs = F.arg_size();
   MSSAWalker = MSSA->getWalker();
   SingletonDeadExpression = new (ExpressionAllocator) DeadExpression();
+  ImplicitControlFlowTracking ImplicitCFT;
+  ICF = &ImplicitCFT;
 
   // Count number of instructions for sizing of hash tables, and come
   // up with a global dfs numbering for instructions.
@@ -4095,6 +4230,147 @@
   return NewValue;
 }
 
+// Create the phi node that replaces the load in load coercion.
+Value *NewGVN::emitLoadCoercionPhi(
+    LoadInst *LI, BasicBlock *InsertBB,
+    SmallVectorImpl<std::pair<BasicBlock *, Instruction *>>
+        &PredsWithAvailableValue) {
+  Value *NewValue = nullptr;
+  SmallVector<PHINode *, 8> NewPHIs;
+  SSAUpdater SSAUpdate(&NewPHIs);
+  SSAUpdate.Initialize(LI->getType(), LI->getName());
+
+  for (const auto &P : PredsWithAvailableValue) {
+    BasicBlock *PredBB = P.first;
+    Instruction *DepI = P.second;
+
+    if (SSAUpdate.HasValueForBlock(PredBB))
+      continue;
+
+    // Get the incoming value for this block. This values is the value that we
+    // extract from the corresponding depending instruction.
+    NewValue = getExtractedValue(LI, DepI);
+    // Match the coerced value with the corresponding incoming block.
+    SSAUpdate.AddAvailableValue(PredBB, NewValue);
+  }
+
+  // Generate the phi node.
+  NewValue = SSAUpdate.GetValueInMiddleOfBlock(InsertBB);
+  // Run value numbering for the new phi node.
+  if (Instruction *I = dyn_cast<Instruction>(NewValue))
+    runValueNumberingForLoadCoercionInsns(I);
+
+  return NewValue;
+}
+
+// Check if the load can be removed and replace it with a phi node. In Example
+// 1, all the predecessors have a depending instruction. Therefore, the load is
+// replaced by a phi node whose incoming values are extracted from each
+// depending instruction.
+//
+// Example 1:
+// Before Load Coercion:
+//     BB1:                        BB2:
+//      store i32 100, i32* %P      store i32 500, i32* %P
+//                             \    /
+//                            BB3:
+//                             %V = load i32, i32* %P
+//
+// After Load Coercion:
+//     BB1:                        BB2:
+//      store i32 100, i32* %P      store i32 500, i32* %P
+//                             \    /
+//                            BB3:
+//                             %phi = phi i32 [ 100, %BB1], [ 500, %BB2 ]
+//
+// In example 2, there is only one depending instruction in BB1. We eliminate
+// the load of BB3 by adding an artificial dependency in BB2. This is done by
+// adding a new load (%V1) in BB2. Now, the load of BB3 has two dpendencies.
+// Therefore, we can replace it with a phi node as it is shown below:
+//
+// Example 2:
+// Before Load Coercion:
+//     BB1:                        BB2:
+//      store i32 100, i32* %P      |
+//                             \    /
+//                            BB3:
+//                             %V = load i32, i32* %P
+//
+// After Load Coercion:
+//     BB1:                        BB2:
+//      store i32 100, i32* %P      %V1 = load i32, i32* %P
+//                             \    /
+//                            BB3:
+//                             %phi = phi i32 [ 100, %BB1], [ %V2, %BB2 ]
+//
+Value *NewGVN::tryReplaceLoadWithPhi(
+    LoadInst *LI, BasicBlock *InsertBB,
+    SmallVectorImpl<std::pair<BasicBlock *, Instruction *>> &PhiOperands,
+    SmallVectorImpl<BasicBlock *> &IncomingBlocksWithoutDep) {
+
+  // If we have found all the phi operands (Example 1), then we are ready to
+  // replace the load with a phi node.
+  if (PhiOperands.size() == pred_size(InsertBB))
+    return emitLoadCoercionPhi(LI, InsertBB, PhiOperands);
+
+  // If there are more than one predecessors without a depending instruction,
+  // then we do not perform load coercion.
+  // TODO: Create a new common predecessor and emit a new load in the common
+  // predecessor.
+  if (IncomingBlocksWithoutDep.size() > 1)
+    return nullptr;
+
+  BasicBlock *IncomingBlock = IncomingBlocksWithoutDep.back();
+  // Do not add a new load in EHPad that does not allow non-phi instructions.
+  if (IncomingBlock->getTerminator()->isEHPad())
+    return nullptr;
+
+  // Do not add a new load inside a loop.
+  // TODO: Create a new basic block between the loop latch and the
+  // InsertBB.
+  if (DT->dominates(InsertBB, IncomingBlock))
+    return nullptr;
+
+  // TODO: Add support for the case where IncomingBlock has more than one
+  // successors.
+  if (succ_size(IncomingBlock) != 1)
+    return nullptr;
+
+  // Emit a new load in the IncomingBlock and call emitLoadCoercionPhi to
+  // replace the Li with a new phi node.
+  Value *LIPtr = LI->getPointerOperand();
+  SmallVector<Instruction *, 8> NewInsts;
+  PHITransAddr Address(LIPtr, DL, AC);
+  // Emits the pointer of the new load if it is not available in the incoming
+  // block.
+  LIPtr =
+      Address.PHITranslateWithInsertion(InsertBB, IncomingBlock, *DT, NewInsts);
+  if (!LIPtr)
+    return nullptr;
+
+  // Generate a new load instruction in the incoming block.
+  auto *NewDependingLoad = new LoadInst(
+      LI->getType(), LIPtr, LI->getName(), LI->isVolatile(), LI->getAlign(),
+      LI->getOrdering(), LI->getSyncScopeID(), IncomingBlock->getTerminator());
+
+  NewInsts.push_back(NewDependingLoad);
+
+  // Update the debug information of the new load.
+  NewDependingLoad->setDebugLoc(LI->getDebugLoc());
+
+  // Update MemorySSA with the new load instruction.
+  updateMemorySSA(LI, NewDependingLoad);
+
+  // Add the basic block in the vector with the basic blokcs that have a
+  // depending instruction.
+  PhiOperands.push_back(std::make_pair(IncomingBlock, NewDependingLoad));
+  for (auto *CurI : NewInsts)
+    runValueNumberingForLoadCoercionInsns(CurI);
+
+  // Create a phi node.
+  return emitLoadCoercionPhi(LI, InsertBB, PhiOperands);
+}
+
 // Iterate over the load instructions of LoadCoercion map and it replaces
 // them with the right sequence of instructions.
 bool NewGVN::implementLoadCoercion() {
@@ -4104,11 +4380,98 @@
     SmallPtrSet<Instruction *, 2> DependingInsns = P.second;
     Value *NewValue = nullptr;
     Instruction *FirstDepI = *DependingInsns.begin();
+    MemoryAccess *OriginalAccess = getMemoryAccess(LI);
+    MemoryAccess *DefiningAccess =
+        MSSAWalker->getClobberingMemoryAccess(OriginalAccess);
 
-    if (DependingInsns.size() == 1 && DT->dominates(FirstDepI, LI))
+    // Check whether the load shoud be replaced with a phi node or we should
+    // just extract the correct value from the depending instruction.
+    //
+    // We do not replace the load with a phi node if there is only one
+    // depending instruction and it dominates the load as it is show in the
+    // following two examples:
+    //
+    // BB1:                                 BB1:
+    //  store i32 100, i32* %P               store i32 100, i32* %P
+    //  ...                                 /    \
+    //  load i32, i32 *%P                 BB2:  BB3:
+    //                                      \    /
+    //                                      BB4:
+    //                                       load i32, i32* %P
+    //
+    if (DependingInsns.size() == 1 && DT->dominates(FirstDepI, LI) &&
+        !isa<MemoryPhi>(DefiningAccess))
       // Extract the correct value from the depending instruction.
       NewValue = getExtractedValue(LI, FirstDepI);
+    else {
+      // Before we replace the load with a phi node, we should find the operands
+      // of the phi node. In Example 1, it is straightforward that the operands
+      // of the new phi node is (100, %BB1) and (500, %BB2).
+      //
+      // Example 1:
+      //     BB1:                        BB2:
+      //      store i32 100, i32* %P      store i32 500, i32* %P
+      //                             \    /
+      //                            BB3:
+      //                             %V = load i32, i32* %P
+      //
+      // However, this is not the case in Example 2. The operands of the new phi
+      // node should be (100, %BB2) and (%V1, %BB3). Hence, the incoming value
+      // might not be in an incoming block. But, the incoming value can aslo be
+      // in a basic block that dominates the incoming block.
+      //                            BB1:
+      //                             %V1 = load i32, i32* %P
+      //                           /    \
+      //      BB2:                      BB3:
+      //       store i32 100, i32* %P    |
+      //                           \    /
+      //                          BB4:
+      //                           %V2 = load i32, i32* %P
+      //
+      // Match the incoming values to the corresponging incoming blocks.
+      BasicBlock *InsertBB = isa<MemoryPhi>(DefiningAccess)
+                                 ? DefiningAccess->getBlock()
+                                 : LI->getParent();
+      SmallVector<std::pair<BasicBlock *, Instruction *>, 2>
+          IncomingBlocksWithDep;
+      SmallVector<BasicBlock *, 2> IncomingBlocksWithoutDep;
+      SmallVector<BasicBlock *, 2> LIPredBBs;
+      for (BasicBlock *PredBB : predecessors(InsertBB))
+        LIPredBBs.push_back(PredBB);
+      SmallPtrSet<Instruction *, 2> LIDependingInsns;
+      for (Instruction *DepI : DependingInsns)
+        LIDependingInsns.insert(DepI);
+      // First, we find the incoming values that belong to the predecessors of
+      // the candidate load.
+      for (Instruction *DepI : llvm::make_early_inc_range(LIDependingInsns)) {
+        BasicBlock *DepIBB = DepI->getParent();
+        auto It = llvm::find_if(LIPredBBs, [DepIBB](BasicBlock *PredBB) {
+          return DepIBB == PredBB;
+        });
+        if (It != LIPredBBs.end()) {
+          IncomingBlocksWithDep.push_back(std::make_pair(DepIBB, DepI));
+          LIPredBBs.erase(It);
+          LIDependingInsns.erase(DepI);
+        }
+      }
+
+      // Next, we find the incoming values that belong to basic blocks that
+      // dominate one of the predecessors of the candidate load.
+      for (BasicBlock *PredBB : LIPredBBs) {
+        auto It =
+            llvm::find_if(LIDependingInsns, [PredBB, this](Instruction *DepI) {
+              return DT->dominates(DepI->getParent(), PredBB);
+            });
+
+        if (It != LIDependingInsns.end())
+          IncomingBlocksWithDep.push_back(std::make_pair(PredBB, *It));
+        else
+          IncomingBlocksWithoutDep.push_back(PredBB);
+      }
 
+      NewValue = tryReplaceLoadWithPhi(LI, InsertBB, IncomingBlocksWithDep,
+                                       IncomingBlocksWithoutDep);
+    }
     // If we could not eliminate the load, then we need to create a load
     // expression for the load and run value numbering in order to add it in the
     // correct congruence class.
@@ -4121,6 +4484,12 @@
                                     DefiningAccess);
       valueNumberInstruction(LI);
       updateProcessedCount(LI);
+      for (Use &U : LI->uses()) {
+        if (auto *II = dyn_cast<IntrinsicInst>(U.getUser())) {
+          valueNumberInstruction(II);
+          updateProcessedCount(II);
+        }
+      }
       continue;
     }
 
@@ -4133,9 +4502,15 @@
     LI->replaceAllUsesWith(NewValue);
     // Run value numbering for the uses of the load after updating them with the
     // new value. In this way, we might be able to eliminate them.
-    for (Instruction *User : LIUses) {
-      valueNumberInstruction(User);
-      updateProcessedCount(User);
+    for (Instruction *I : LIUses) {
+      valueNumberInstruction(I);
+      updateProcessedCount(I);
+      for (Use &U : I->uses()) {
+        if (auto *II = dyn_cast<IntrinsicInst>(U.getUser())) {
+          valueNumberInstruction(II);
+          updateProcessedCount(II);
+        }
+      }
     }
     // Update the name of the phi node if we generated one.
     if (isa<PHINode>(NewValue))
@@ -4148,7 +4523,6 @@
                       << *NewValue << "\n");
     AnythingReplaced = true;
   }
-
   return AnythingReplaced;
 }
 
@@ -4568,6 +4942,8 @@
     AU.addRequired<AAResultsWrapperPass>();
     AU.addPreserved<DominatorTreeWrapperPass>();
     AU.addPreserved<GlobalsAAWrapperPass>();
+    AU.addRequired<PostDominatorTreeWrapperPass>();
+    AU.addPreserved<PostDominatorTreeWrapperPass>();
   }
 };
 
@@ -4577,6 +4953,7 @@
   if (skipFunction(F))
     return false;
   return NewGVN(F, &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+                &getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(),
                 &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
                 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F),
                 &getAnalysis<AAResultsWrapperPass>().getAAResults(),
@@ -4592,6 +4969,7 @@
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
@@ -4607,12 +4985,13 @@
   // the same order here, just in case.
   auto &AC = AM.getResult<AssumptionAnalysis>(F);
   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+  auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
   auto &AA = AM.getResult<AAManager>(F);
   auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
-  bool Changed =
-      NewGVN(F, &DT, &AC, &TLI, &AA, &MSSA, F.getParent()->getDataLayout())
-          .runGVN();
+  bool Changed = NewGVN(F, &DT, &PDT, &AC, &TLI, &AA, &MSSA,
+                        F.getParent()->getDataLayout())
+                     .runGVN();
   if (!Changed)
     return PreservedAnalyses::all();
   PreservedAnalyses PA;
diff --git a/llvm/test/Transforms/NewGVN/basic-cyclic-opt.ll b/llvm/test/Transforms/NewGVN/basic-cyclic-opt.ll
--- a/llvm/test/Transforms/NewGVN/basic-cyclic-opt.ll
+++ b/llvm/test/Transforms/NewGVN/basic-cyclic-opt.ll
@@ -248,7 +248,7 @@
 define i8 @irreducible_memoryphi(i8* noalias %arg, i8* noalias %arg2) {
 ; CHECK-LABEL: @irreducible_memoryphi(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    store i8 0, i8* [[ARG:%.*]]
+; CHECK-NEXT:    store i8 0, i8* [[ARG:%.*]], align 1
 ; CHECK-NEXT:    br i1 undef, label [[BB2:%.*]], label [[BB1:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    br label [[BB2]]
diff --git a/llvm/test/Transforms/NewGVN/load_coercion_replace_load_with_phi.ll b/llvm/test/Transforms/NewGVN/load_coercion_replace_load_with_phi.ll
--- a/llvm/test/Transforms/NewGVN/load_coercion_replace_load_with_phi.ll
+++ b/llvm/test/Transforms/NewGVN/load_coercion_replace_load_with_phi.ll
@@ -10,31 +10,18 @@
 ;     vv
 ;    Exit
 ;
-; OLDGVN-LABEL: @test1(
-; OLDGVN-NEXT:  Entry:
-; OLDGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
-; OLDGVN:       T:
-; OLDGVN-NEXT:    store i32 [[V1:%.*]], i32* [[P:%.*]], align 4
-; OLDGVN-NEXT:    br label [[EXIT:%.*]]
-; OLDGVN:       F:
-; OLDGVN-NEXT:    store i32 13, i32* [[P]], align 4
-; OLDGVN-NEXT:    br label [[EXIT]]
-; OLDGVN:       Exit:
-; OLDGVN-NEXT:    [[V2:%.*]] = phi i32 [ 13, [[F]] ], [ [[V1]], [[T]] ]
-; OLDGVN-NEXT:    ret i32 [[V2]]
-;
-; NEWGVN-LABEL: @test1(
-; NEWGVN-NEXT:  Entry:
-; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
-; NEWGVN:       T:
-; NEWGVN-NEXT:    store i32 [[V1:%.*]], i32* [[P:%.*]], align 4
-; NEWGVN-NEXT:    br label [[EXIT:%.*]]
-; NEWGVN:       F:
-; NEWGVN-NEXT:    store i32 13, i32* [[P]], align 4
-; NEWGVN-NEXT:    br label [[EXIT]]
-; NEWGVN:       Exit:
-; NEWGVN-NEXT:    [[V2:%.*]] = load i32, i32* [[P]], align 4
-; NEWGVN-NEXT:    ret i32 [[V2]]
+; GVN-LABEL: @test1(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; GVN:       T:
+; GVN-NEXT:    store i32 [[V1:%.*]], i32* [[P:%.*]], align 4
+; GVN-NEXT:    br label [[EXIT:%.*]]
+; GVN:       F:
+; GVN-NEXT:    store i32 13, i32* [[P]], align 4
+; GVN-NEXT:    br label [[EXIT]]
+; GVN:       Exit:
+; GVN-NEXT:    [[V2:%.*]] = phi i32 [ 13, [[F]] ], [ [[V1]], [[T]] ]
+; GVN-NEXT:    ret i32 [[V2]]
 ;
 Entry:
   br i1 %Cond, label %T, label %F
@@ -87,11 +74,10 @@
 ; NEWGVN-NEXT:    store float 1.000000e+00, float* [[P1]], align 4
 ; NEWGVN-NEXT:    br label [[EXIT]]
 ; NEWGVN:       Exit:
-; NEWGVN-NEXT:    [[PHI:%.*]] = phi i8 [ 0, [[F]] ], [ 42, [[T]] ]
+; NEWGVN-NEXT:    [[PHIOFOPS:%.*]] = phi i8 [ 0, [[F]] ], [ 84, [[T]] ]
+; NEWGVN-NEXT:    [[V2:%.*]] = phi i8 [ 0, [[F]] ], [ 42, [[T]] ]
 ; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i32* [[P]] to i8*
-; NEWGVN-NEXT:    [[V2:%.*]] = load i8, i8* [[P2]], align 1
-; NEWGVN-NEXT:    [[V3:%.*]] = add i8 [[V2]], [[PHI]]
-; NEWGVN-NEXT:    ret i8 [[V3]]
+; NEWGVN-NEXT:    ret i8 [[PHIOFOPS]]
 ;
 Entry:
   %P1 = bitcast i32* %P to float*
@@ -145,9 +131,10 @@
 ; NEWGVN-NEXT:    store i32 42, i32* [[P]], align 4
 ; NEWGVN-NEXT:    br label [[EXIT:%.*]]
 ; NEWGVN:       F:
+; NEWGVN-NEXT:    [[V41:%.*]] = load i8, i8* [[P4]], align 1
 ; NEWGVN-NEXT:    br label [[EXIT]]
 ; NEWGVN:       Exit:
-; NEWGVN-NEXT:    [[V4:%.*]] = load i8, i8* [[P4]], align 1
+; NEWGVN-NEXT:    [[V4:%.*]] = phi i8 [ [[V41]], [[F]] ], [ 0, [[T]] ]
 ; NEWGVN-NEXT:    ret i8 [[V4]]
 ;
 Entry:
@@ -260,10 +247,11 @@
 ; NEWGVN-NEXT:    store i32 13, i32* [[P]], align 4
 ; NEWGVN-NEXT:    br label [[EXIT]]
 ; NEWGVN:       F2:
+; NEWGVN-NEXT:    [[V11:%.*]] = load i32, i32* [[P]], align 4
 ; NEWGVN-NEXT:    br label [[EXIT]]
 ; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[V1:%.*]] = phi i32 [ 42, [[T]] ], [ 13, [[F1]] ], [ [[V11]], [[F2]] ]
 ; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ 1, [[T]] ], [ 2, [[F1]] ], [ 3, [[F2]] ]
-; NEWGVN-NEXT:    [[V1:%.*]] = load i32, i32* [[P]], align 4
 ; NEWGVN-NEXT:    [[V2:%.*]] = add i32 [[PHI]], [[V1]]
 ; NEWGVN-NEXT:    ret i32 [[V2]]
 ;
@@ -300,27 +288,16 @@
 ;    v  v
 ;    Exit
 ;
-; OLDGVN-LABEL: @test6(
-; OLDGVN-NEXT:  Entry:
-; OLDGVN-NEXT:    store i32 42, i32* [[P:%.*]], align 4
-; OLDGVN-NEXT:    br i1 [[COND1:%.*]], label [[LOOP:%.*]], label [[EXIT:%.*]]
-; OLDGVN:       Loop:
-; OLDGVN-NEXT:    store i32 13, i32* [[P]], align 4
-; OLDGVN-NEXT:    br i1 [[COND2:%.*]], label [[LOOP]], label [[EXIT]]
-; OLDGVN:       Exit:
-; OLDGVN-NEXT:    [[V:%.*]] = phi i32 [ 13, [[LOOP]] ], [ 42, [[ENTRY:%.*]] ]
-; OLDGVN-NEXT:    ret i32 [[V]]
-;
-; NEWGVN-LABEL: @test6(
-; NEWGVN-NEXT:  Entry:
-; NEWGVN-NEXT:    store i32 42, i32* [[P:%.*]], align 4
-; NEWGVN-NEXT:    br i1 [[COND1:%.*]], label [[LOOP:%.*]], label [[EXIT:%.*]]
-; NEWGVN:       Loop:
-; NEWGVN-NEXT:    store i32 13, i32* [[P]], align 4
-; NEWGVN-NEXT:    br i1 [[COND2:%.*]], label [[LOOP]], label [[EXIT]]
-; NEWGVN:       Exit:
-; NEWGVN-NEXT:    [[V:%.*]] = load i32, i32* [[P]], align 4
-; NEWGVN-NEXT:    ret i32 [[V]]
+; GVN-LABEL: @test6(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    store i32 42, i32* [[P:%.*]], align 4
+; GVN-NEXT:    br i1 [[COND1:%.*]], label [[LOOP:%.*]], label [[EXIT:%.*]]
+; GVN:       Loop:
+; GVN-NEXT:    store i32 13, i32* [[P]], align 4
+; GVN-NEXT:    br i1 [[COND2:%.*]], label [[LOOP]], label [[EXIT]]
+; GVN:       Exit:
+; GVN-NEXT:    [[V:%.*]] = phi i32 [ 13, [[LOOP]] ], [ 42, [[ENTRY:%.*]] ]
+; GVN-NEXT:    ret i32 [[V]]
 ;
 Entry:
   store i32 42, i32* %P
@@ -376,12 +353,12 @@
 ; NEWGVN-NEXT:    store i32 13, i32* [[P]], align 4
 ; NEWGVN-NEXT:    br i1 [[COND2:%.*]], label [[INNER_LOOP]], label [[OUTER_LOOP_LATCH]]
 ; NEWGVN:       Outer.Loop.Latch:
+; NEWGVN-NEXT:    [[PHIOFOPS:%.*]] = phi i32 [ 242, [[OUTER_LOOP]] ], [ 113, [[INNER_LOOP]] ]
+; NEWGVN-NEXT:    [[V2:%.*]] = phi i32 [ 13, [[INNER_LOOP]] ], [ 42, [[OUTER_LOOP]] ]
 ; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ 100, [[INNER_LOOP]] ], [ 200, [[OUTER_LOOP]] ]
-; NEWGVN-NEXT:    [[V2:%.*]] = load i32, i32* [[P]], align 4
-; NEWGVN-NEXT:    [[V3:%.*]] = add i32 [[V2]], [[PHI]]
 ; NEWGVN-NEXT:    br i1 [[COND3:%.*]], label [[OUTER_LOOP]], label [[EXIT:%.*]]
 ; NEWGVN:       Exit:
-; NEWGVN-NEXT:    ret i32 [[V3]]
+; NEWGVN-NEXT:    ret i32 [[PHIOFOPS]]
 ;
 Entry:
   br label %Outer.Loop
@@ -440,14 +417,17 @@
 ; NEWGVN:       T:
 ; NEWGVN-NEXT:    [[P1:%.*]] = bitcast i8* [[P:%.*]] to <2 x i32>*
 ; NEWGVN-NEXT:    [[V1:%.*]] = load <2 x i32>, <2 x i32>* [[P1]], align 8
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
 ; NEWGVN-NEXT:    [[V3:%.*]] = extractelement <2 x i32> [[V1]], i64 1
 ; NEWGVN-NEXT:    br label [[EXIT:%.*]]
 ; NEWGVN:       F:
+; NEWGVN-NEXT:    [[P2_PHI_TRANS_INSERT:%.*]] = bitcast i8* [[P]] to i32*
+; NEWGVN-NEXT:    [[V21:%.*]] = load i32, i32* [[P2_PHI_TRANS_INSERT]], align 4
 ; NEWGVN-NEXT:    br label [[EXIT]]
 ; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[V2:%.*]] = phi i32 [ [[V21]], [[F]] ], [ [[TMP1]], [[T]] ]
 ; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ 100, [[F]] ], [ [[V3]], [[T]] ]
-; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i8* [[P]] to i32*
-; NEWGVN-NEXT:    [[V2:%.*]] = load i32, i32* [[P2]], align 4
 ; NEWGVN-NEXT:    [[V4:%.*]] = add i32 [[V2]], [[PHI]]
 ; NEWGVN-NEXT:    ret i32 [[V4]]
 ;
@@ -503,15 +483,18 @@
 ; NEWGVN-NEXT:  Entry:
 ; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
 ; NEWGVN:       T:
+; NEWGVN-NEXT:    [[V41:%.*]] = load i32, i32* [[P:%.*]], align 4
 ; NEWGVN-NEXT:    br label [[EXIT:%.*]]
 ; NEWGVN:       F:
-; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i32* [[P:%.*]] to <2 x i32>*
+; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i32* [[P]] to <2 x i32>*
 ; NEWGVN-NEXT:    [[V2:%.*]] = load <2 x i32>, <2 x i32>* [[P2]], align 8
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to i64
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
 ; NEWGVN-NEXT:    [[V3:%.*]] = extractelement <2 x i32> [[V2]], i64 1
 ; NEWGVN-NEXT:    br label [[EXIT]]
 ; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[V4:%.*]] = phi i32 [ [[V41]], [[T]] ], [ [[TMP1]], [[F]] ]
 ; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ 0, [[T]] ], [ [[V3]], [[F]] ]
-; NEWGVN-NEXT:    [[V4:%.*]] = load i32, i32* [[P]], align 4
 ; NEWGVN-NEXT:    [[V5:%.*]] = add i32 [[V4]], [[PHI]]
 ; NEWGVN-NEXT:    ret i32 [[V5]]
 ;
@@ -573,11 +556,13 @@
 ; NEWGVN:       F:
 ; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i32* [[P]] to <2 x i32>*
 ; NEWGVN-NEXT:    [[V2:%.*]] = load <2 x i32>, <2 x i32>* [[P2]], align 8
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to i64
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
 ; NEWGVN-NEXT:    [[V3:%.*]] = extractelement <2 x i32> [[V2]], i64 1
 ; NEWGVN-NEXT:    br label [[EXIT]]
 ; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[V4:%.*]] = phi i32 [ [[V1]], [[T]] ], [ [[TMP1]], [[F]] ]
 ; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ [[V1]], [[T]] ], [ [[V3]], [[F]] ]
-; NEWGVN-NEXT:    [[V4:%.*]] = load i32, i32* [[P]], align 4
 ; NEWGVN-NEXT:    [[V5:%.*]] = add i32 [[PHI]], [[PHI]]
 ; NEWGVN-NEXT:    [[V6:%.*]] = add i32 [[V5]], [[V4]]
 ; NEWGVN-NEXT:    ret i32 [[V6]]
@@ -731,14 +716,16 @@
 ; NEWGVN:       T:
 ; NEWGVN-NEXT:    [[P1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
 ; NEWGVN-NEXT:    [[V1:%.*]] = load <4 x i32>, <4 x i32>* [[P1]], align 16
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to i128
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i128 [[TMP0]] to i32
 ; NEWGVN-NEXT:    [[V2:%.*]] = extractelement <4 x i32> [[V1]], i64 2
 ; NEWGVN-NEXT:    br label [[EXIT:%.*]]
 ; NEWGVN:       F:
 ; NEWGVN-NEXT:    store i32 13, i32* [[P]], align 4
 ; NEWGVN-NEXT:    br label [[EXIT]]
 ; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[V3:%.*]] = phi i32 [ [[TMP1]], [[T]] ], [ 13, [[F]] ]
 ; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ [[V2]], [[T]] ], [ 100, [[F]] ]
-; NEWGVN-NEXT:    [[V3:%.*]] = load i32, i32* [[P]], align 4
 ; NEWGVN-NEXT:    [[V4:%.*]] = add i32 [[PHI]], [[V3]]
 ; NEWGVN-NEXT:    ret i32 [[V4]]
 ;
@@ -796,10 +783,8 @@
 ; NEWGVN-NEXT:    store i32 100, i32* [[P]], align 4
 ; NEWGVN-NEXT:    br label [[EXIT]]
 ; NEWGVN:       Exit:
-; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ 13, [[T]] ], [ 100, [[F]] ]
-; NEWGVN-NEXT:    [[V3:%.*]] = load i32, i32* [[P]], align 4
-; NEWGVN-NEXT:    [[V4:%.*]] = add i32 [[PHI]], [[V3]]
-; NEWGVN-NEXT:    ret i32 [[V4]]
+; NEWGVN-NEXT:    [[PHIOFOPS:%.*]] = phi i32 [ 200, [[F]] ], [ 26, [[T]] ]
+; NEWGVN-NEXT:    ret i32 [[PHIOFOPS]]
 ;
 Entry:
   %P1 = bitcast i32* %P to <4 x i32>*
@@ -830,27 +815,16 @@
 ;   v  v
 ;   Exit
 ;
-; OLDGVN-LABEL: @test15(
-; OLDGVN-NEXT:  Entry:
-; OLDGVN-NEXT:    store i32 42, i32* [[P:%.*]], align 4
-; OLDGVN-NEXT:    br i1 [[COND:%.*]], label [[BB:%.*]], label [[EXIT:%.*]]
-; OLDGVN:       BB:
-; OLDGVN-NEXT:    store i32 13, i32* [[P]], align 4
-; OLDGVN-NEXT:    br label [[EXIT]]
-; OLDGVN:       Exit:
-; OLDGVN-NEXT:    [[V1:%.*]] = phi i32 [ 13, [[BB]] ], [ 42, [[ENTRY:%.*]] ]
-; OLDGVN-NEXT:    ret i32 [[V1]]
-;
-; NEWGVN-LABEL: @test15(
-; NEWGVN-NEXT:  Entry:
-; NEWGVN-NEXT:    store i32 42, i32* [[P:%.*]], align 4
-; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[BB:%.*]], label [[EXIT:%.*]]
-; NEWGVN:       BB:
-; NEWGVN-NEXT:    store i32 13, i32* [[P]], align 4
-; NEWGVN-NEXT:    br label [[EXIT]]
-; NEWGVN:       Exit:
-; NEWGVN-NEXT:    [[V1:%.*]] = load i32, i32* [[P]], align 4
-; NEWGVN-NEXT:    ret i32 [[V1]]
+; GVN-LABEL: @test15(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    store i32 42, i32* [[P:%.*]], align 4
+; GVN-NEXT:    br i1 [[COND:%.*]], label [[BB:%.*]], label [[EXIT:%.*]]
+; GVN:       BB:
+; GVN-NEXT:    store i32 13, i32* [[P]], align 4
+; GVN-NEXT:    br label [[EXIT]]
+; GVN:       Exit:
+; GVN-NEXT:    [[V1:%.*]] = phi i32 [ 13, [[BB]] ], [ 42, [[ENTRY:%.*]] ]
+; GVN-NEXT:    ret i32 [[V1]]
 ;
 Entry:
   store i32 42, i32* %P
@@ -894,14 +868,16 @@
 ; NEWGVN-NEXT:  Entry:
 ; NEWGVN-NEXT:    [[P1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
 ; NEWGVN-NEXT:    [[V1:%.*]] = load <4 x i32>, <4 x i32>* [[P1]], align 16
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to i128
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i128 [[TMP0]] to i32
 ; NEWGVN-NEXT:    [[V2:%.*]] = extractelement <4 x i32> [[V1]], i64 2
 ; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[BB:%.*]], label [[EXIT:%.*]]
 ; NEWGVN:       BB:
 ; NEWGVN-NEXT:    store i32 13, i32* [[P]], align 4
 ; NEWGVN-NEXT:    br label [[EXIT]]
 ; NEWGVN:       Exit:
-; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ [[V2]], [[ENTRY:%.*]] ], [ 100, [[BB]] ]
-; NEWGVN-NEXT:    [[V3:%.*]] = load i32, i32* [[P]], align 4
+; NEWGVN-NEXT:    [[V3:%.*]] = phi i32 [ [[TMP1]], [[ENTRY:%.*]] ], [ 13, [[BB]] ]
+; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ [[V2]], [[ENTRY]] ], [ 100, [[BB]] ]
 ; NEWGVN-NEXT:    [[V4:%.*]] = add i32 [[PHI]], [[V3]]
 ; NEWGVN-NEXT:    ret i32 [[V4]]
 ;
@@ -1010,6 +986,8 @@
 ; NEWGVN-NEXT:  Entry:
 ; NEWGVN-NEXT:    [[P1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
 ; NEWGVN-NEXT:    [[V1:%.*]] = load <4 x i32>, <4 x i32>* [[P1]], align 16
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to i128
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i128 [[TMP0]] to i32
 ; NEWGVN-NEXT:    [[V2:%.*]] = extractelement <4 x i32> [[V1]], i64 2
 ; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
 ; NEWGVN:       T:
@@ -1019,8 +997,8 @@
 ; NEWGVN-NEXT:    store i32 100, i32* [[P]], align 4
 ; NEWGVN-NEXT:    br label [[EXIT]]
 ; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[V4:%.*]] = phi i32 [ [[TMP1]], [[T]] ], [ 100, [[F]] ]
 ; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ [[V2]], [[T]] ], [ 13, [[F]] ]
-; NEWGVN-NEXT:    [[V4:%.*]] = load i32, i32* [[P]], align 4
 ; NEWGVN-NEXT:    [[V5:%.*]] = add i32 [[PHI]], [[V4]]
 ; NEWGVN-NEXT:    ret i32 [[V5]]
 ;
@@ -1082,10 +1060,10 @@
 ; NEWGVN-NEXT:    br label [[EXIT:%.*]]
 ; NEWGVN:       F:
 ; NEWGVN-NEXT:    store i32 [[V:%.*]], i32* [[P]], align 4
+; NEWGVN-NEXT:    [[TMP0:%.*]] = trunc i32 [[V]] to i16
 ; NEWGVN-NEXT:    br label [[EXIT]]
 ; NEWGVN:       Exit:
-; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i32* [[P]] to i16*
-; NEWGVN-NEXT:    [[V2:%.*]] = load i16, i16* [[P2]], align 2
+; NEWGVN-NEXT:    [[V2:%.*]] = phi i16 [ [[TMP0]], [[F]] ], [ 13, [[T]] ]
 ; NEWGVN-NEXT:    ret i16 [[V2]]
 ;
 Entry:
@@ -1208,9 +1186,10 @@
 ; NEWGVN-NEXT:    store i32 42, i32* [[P1:%.*]], align 4
 ; NEWGVN-NEXT:    br label [[EXIT:%.*]]
 ; NEWGVN:       F:
+; NEWGVN-NEXT:    [[V11:%.*]] = load i32, i32* [[P1]], align 4
 ; NEWGVN-NEXT:    br label [[EXIT]]
 ; NEWGVN:       Exit:
-; NEWGVN-NEXT:    [[V1:%.*]] = load i32, i32* [[P1]], align 4
+; NEWGVN-NEXT:    [[V1:%.*]] = phi i32 [ [[V11]], [[F]] ], [ 42, [[T]] ]
 ; NEWGVN-NEXT:    [[V2:%.*]] = load i32, i32* [[P2:%.*]], align 4
 ; NEWGVN-NEXT:    [[V3:%.*]] = add i32 [[V1]], [[V2]]
 ; NEWGVN-NEXT:    ret i32 [[V3]]
@@ -1262,13 +1241,15 @@
 ; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
 ; NEWGVN:       T:
 ; NEWGVN-NEXT:    store i32 42, i32* [[P1:%.*]], align 4
+; NEWGVN-NEXT:    [[V23:%.*]] = load i32, i32* [[P2:%.*]], align 4
 ; NEWGVN-NEXT:    br label [[EXIT:%.*]]
 ; NEWGVN:       F:
-; NEWGVN-NEXT:    store i32 13, i32* [[P2:%.*]], align 4
+; NEWGVN-NEXT:    store i32 13, i32* [[P2]], align 4
+; NEWGVN-NEXT:    [[V11:%.*]] = load i32, i32* [[P1]], align 4
 ; NEWGVN-NEXT:    br label [[EXIT]]
 ; NEWGVN:       Exit:
-; NEWGVN-NEXT:    [[V1:%.*]] = load i32, i32* [[P1]], align 4
-; NEWGVN-NEXT:    [[V2:%.*]] = load i32, i32* [[P2]], align 4
+; NEWGVN-NEXT:    [[V2:%.*]] = phi i32 [ 13, [[F]] ], [ [[V23]], [[T]] ]
+; NEWGVN-NEXT:    [[V1:%.*]] = phi i32 [ [[V11]], [[F]] ], [ 42, [[T]] ]
 ; NEWGVN-NEXT:    [[V3:%.*]] = add i32 [[V1]], [[V2]]
 ; NEWGVN-NEXT:    ret i32 [[V3]]
 ;
@@ -1384,11 +1365,12 @@
 ; NEWGVN:       F:
 ; NEWGVN-NEXT:    br i1 [[COND2:%.*]], label [[F1:%.*]], label [[F2:%.*]]
 ; NEWGVN:       F1:
+; NEWGVN-NEXT:    [[V31:%.*]] = load i32, i32* [[P]], align 4
 ; NEWGVN-NEXT:    br label [[BB]]
 ; NEWGVN:       F2:
 ; NEWGVN-NEXT:    br label [[EXIT:%.*]]
 ; NEWGVN:       BB:
-; NEWGVN-NEXT:    [[V3:%.*]] = load i32, i32* [[P]], align 4
+; NEWGVN-NEXT:    [[V3:%.*]] = phi i32 [ [[V31]], [[F1]] ], [ 42, [[T]] ]
 ; NEWGVN-NEXT:    br label [[EXIT]]
 ; NEWGVN:       Exit:
 ; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ [[V3]], [[BB]] ], [ 100, [[F2]] ]
@@ -1457,14 +1439,17 @@
 ; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
 ; NEWGVN:       T:
 ; NEWGVN-NEXT:    store i32 [[V1:%.*]], i32* [[P1:%.*]], align 4
+; NEWGVN-NEXT:    [[TMP0:%.*]] = lshr i32 [[V1]], 16
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
 ; NEWGVN-NEXT:    br label [[EXIT:%.*]]
 ; NEWGVN:       F:
+; NEWGVN-NEXT:    [[P2_PHI_TRANS_INSERT:%.*]] = bitcast i32* [[P1]] to i8*
+; NEWGVN-NEXT:    [[GEP_PHI_TRANS_INSERT:%.*]] = getelementptr i8, i8* [[P2_PHI_TRANS_INSERT]], i32 2
+; NEWGVN-NEXT:    [[V21:%.*]] = load i8, i8* [[GEP_PHI_TRANS_INSERT]], align 1
 ; NEWGVN-NEXT:    br label [[EXIT]]
 ; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[V2:%.*]] = phi i8 [ [[TMP1]], [[T]] ], [ [[V21]], [[F]] ]
 ; NEWGVN-NEXT:    [[PHI:%.*]] = phi i8 [ 0, [[T]] ], [ 100, [[F]] ]
-; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i32* [[P1]] to i8*
-; NEWGVN-NEXT:    [[GEP:%.*]] = getelementptr i8, i8* [[P2]], i32 2
-; NEWGVN-NEXT:    [[V2:%.*]] = load i8, i8* [[GEP]], align 1
 ; NEWGVN-NEXT:    [[V3:%.*]] = add i8 [[PHI]], [[V2]]
 ; NEWGVN-NEXT:    ret i8 [[V3]]
 ;
@@ -1522,16 +1507,17 @@
 ; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
 ; NEWGVN:       T:
 ; NEWGVN-NEXT:    [[V1:%.*]] = load i32, i32* [[P1:%.*]], align 4
-; NEWGVN-NEXT:    [[V2:%.*]] = lshr i32 [[V1]], 16
-; NEWGVN-NEXT:    [[V3:%.*]] = trunc i32 [[V2]] to i8
+; NEWGVN-NEXT:    [[TMP0:%.*]] = lshr i32 [[V1]], 16
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
 ; NEWGVN-NEXT:    br label [[EXIT:%.*]]
 ; NEWGVN:       F:
+; NEWGVN-NEXT:    [[P2_PHI_TRANS_INSERT:%.*]] = bitcast i32* [[P1]] to i8*
+; NEWGVN-NEXT:    [[GEP_PHI_TRANS_INSERT:%.*]] = getelementptr i8, i8* [[P2_PHI_TRANS_INSERT]], i32 2
+; NEWGVN-NEXT:    [[V41:%.*]] = load i8, i8* [[GEP_PHI_TRANS_INSERT]], align 1
 ; NEWGVN-NEXT:    br label [[EXIT]]
 ; NEWGVN:       Exit:
-; NEWGVN-NEXT:    [[PHI:%.*]] = phi i8 [ [[V3]], [[T]] ], [ 100, [[F]] ]
-; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i32* [[P1]] to i8*
-; NEWGVN-NEXT:    [[GEP:%.*]] = getelementptr i8, i8* [[P2]], i32 2
-; NEWGVN-NEXT:    [[V4:%.*]] = load i8, i8* [[GEP]], align 1
+; NEWGVN-NEXT:    [[V4:%.*]] = phi i8 [ [[TMP1]], [[T]] ], [ [[V41]], [[F]] ]
+; NEWGVN-NEXT:    [[PHI:%.*]] = phi i8 [ [[TMP1]], [[T]] ], [ 100, [[F]] ]
 ; NEWGVN-NEXT:    [[V5:%.*]] = add i8 [[PHI]], [[V4]]
 ; NEWGVN-NEXT:    ret i8 [[V5]]
 ;
@@ -1667,47 +1653,26 @@
 ;    v v v
 ;     Exit
 ;
-; OLDGVN-LABEL: @test28(
-; OLDGVN-NEXT:  Entry:
-; OLDGVN-NEXT:    br i1 [[COND1:%.*]], label [[T:%.*]], label [[F:%.*]]
-; OLDGVN:       T:
-; OLDGVN-NEXT:    store i32 [[V1:%.*]], i32* [[P:%.*]], align 4
-; OLDGVN-NEXT:    br i1 [[COND2:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
-; OLDGVN:       F:
-; OLDGVN-NEXT:    store i32 13, i32* [[P]], align 4
-; OLDGVN-NEXT:    br label [[EXIT:%.*]]
-; OLDGVN:       BB1:
-; OLDGVN-NEXT:    [[V2:%.*]] = add i32 [[V1]], 100
-; OLDGVN-NEXT:    br label [[EXIT]]
-; OLDGVN:       BB2:
-; OLDGVN-NEXT:    [[V3:%.*]] = add i32 [[V1]], 13
-; OLDGVN-NEXT:    br label [[EXIT]]
-; OLDGVN:       Exit:
-; OLDGVN-NEXT:    [[V4:%.*]] = phi i32 [ 13, [[F]] ], [ [[V1]], [[BB1]] ], [ [[V1]], [[BB2]] ]
-; OLDGVN-NEXT:    [[PHI:%.*]] = phi i32 [ 10, [[F]] ], [ [[V2]], [[BB1]] ], [ [[V3]], [[BB2]] ]
-; OLDGVN-NEXT:    [[V5:%.*]] = add i32 [[PHI]], [[V4]]
-; OLDGVN-NEXT:    ret i32 [[V5]]
-;
-; NEWGVN-LABEL: @test28(
-; NEWGVN-NEXT:  Entry:
-; NEWGVN-NEXT:    br i1 [[COND1:%.*]], label [[T:%.*]], label [[F:%.*]]
-; NEWGVN:       T:
-; NEWGVN-NEXT:    store i32 [[V1:%.*]], i32* [[P:%.*]], align 4
-; NEWGVN-NEXT:    br i1 [[COND2:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
-; NEWGVN:       F:
-; NEWGVN-NEXT:    store i32 13, i32* [[P]], align 4
-; NEWGVN-NEXT:    br label [[EXIT:%.*]]
-; NEWGVN:       BB1:
-; NEWGVN-NEXT:    [[V2:%.*]] = add i32 [[V1]], 100
-; NEWGVN-NEXT:    br label [[EXIT]]
-; NEWGVN:       BB2:
-; NEWGVN-NEXT:    [[V3:%.*]] = add i32 [[V1]], 13
-; NEWGVN-NEXT:    br label [[EXIT]]
-; NEWGVN:       Exit:
-; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ 10, [[F]] ], [ [[V2]], [[BB1]] ], [ [[V3]], [[BB2]] ]
-; NEWGVN-NEXT:    [[V4:%.*]] = load i32, i32* [[P]], align 4
-; NEWGVN-NEXT:    [[V5:%.*]] = add i32 [[PHI]], [[V4]]
-; NEWGVN-NEXT:    ret i32 [[V5]]
+; GVN-LABEL: @test28(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    br i1 [[COND1:%.*]], label [[T:%.*]], label [[F:%.*]]
+; GVN:       T:
+; GVN-NEXT:    store i32 [[V1:%.*]], i32* [[P:%.*]], align 4
+; GVN-NEXT:    br i1 [[COND2:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; GVN:       F:
+; GVN-NEXT:    store i32 13, i32* [[P]], align 4
+; GVN-NEXT:    br label [[EXIT:%.*]]
+; GVN:       BB1:
+; GVN-NEXT:    [[V2:%.*]] = add i32 [[V1]], 100
+; GVN-NEXT:    br label [[EXIT]]
+; GVN:       BB2:
+; GVN-NEXT:    [[V3:%.*]] = add i32 [[V1]], 13
+; GVN-NEXT:    br label [[EXIT]]
+; GVN:       Exit:
+; GVN-NEXT:    [[V4:%.*]] = phi i32 [ 13, [[F]] ], [ [[V1]], [[BB1]] ], [ [[V1]], [[BB2]] ]
+; GVN-NEXT:    [[PHI:%.*]] = phi i32 [ 10, [[F]] ], [ [[V2]], [[BB1]] ], [ [[V3]], [[BB2]] ]
+; GVN-NEXT:    [[V5:%.*]] = add i32 [[PHI]], [[V4]]
+; GVN-NEXT:    ret i32 [[V5]]
 ;
 Entry:
   br i1 %Cond1, label %T, label %F
@@ -2078,10 +2043,11 @@
 ; NEWGVN-NEXT:    store i64 [[INDEX]], i64* [[P1]], align 4
 ; NEWGVN-NEXT:    br label [[LOOP_LATCH]]
 ; NEWGVN:       F:
+; NEWGVN-NEXT:    [[V11:%.*]] = load i64, i64* [[P1]], align 4
 ; NEWGVN-NEXT:    br label [[LOOP_LATCH]]
 ; NEWGVN:       Loop.Latch:
+; NEWGVN-NEXT:    [[V1:%.*]] = phi i64 [ [[INDEX]], [[T]] ], [ [[V11]], [[F]] ]
 ; NEWGVN-NEXT:    [[PHI:%.*]] = phi i64 [ 100, [[T]] ], [ 50, [[F]] ]
-; NEWGVN-NEXT:    [[V1:%.*]] = load i64, i64* [[P1]], align 4
 ; NEWGVN-NEXT:    [[V2:%.*]] = add i64 [[V1]], [[PHI]]
 ; NEWGVN-NEXT:    [[INDEX_INC]] = add i64 [[INDEX]], 1
 ; NEWGVN-NEXT:    [[COND2:%.*]] = icmp ne i64 [[INDEX_INC]], [[TC:%.*]]
@@ -2260,10 +2226,11 @@
 ; NEWGVN-NEXT:    [[V1:%.*]] = load i64, i64* [[P1]], align 4
 ; NEWGVN-NEXT:    br label [[LOOP_LATCH]]
 ; NEWGVN:       F:
+; NEWGVN-NEXT:    [[V21:%.*]] = load i64, i64* [[P1]], align 4
 ; NEWGVN-NEXT:    br label [[LOOP_LATCH]]
 ; NEWGVN:       Loop.Latch:
+; NEWGVN-NEXT:    [[V2:%.*]] = phi i64 [ [[V1]], [[T]] ], [ [[V21]], [[F]] ]
 ; NEWGVN-NEXT:    [[PHI:%.*]] = phi i64 [ [[V1]], [[T]] ], [ 100, [[F]] ]
-; NEWGVN-NEXT:    [[V2:%.*]] = load i64, i64* [[P1]], align 4
 ; NEWGVN-NEXT:    [[V3:%.*]] = add i64 [[PHI]], [[V2]]
 ; NEWGVN-NEXT:    [[INDEX_INC]] = add i64 [[INDEX]], 1
 ; NEWGVN-NEXT:    [[COND2:%.*]] = icmp ne i64 [[INDEX_INC]], [[TC:%.*]]
@@ -2364,6 +2331,7 @@
   ret i64 %V4
 }
 
+; TODO: Add support for partial redundant load elimination for loops.
 define i64 @test39(i64* %P, i1 %Cond1, i64 %TC) {
 ;    Entry
 ;      |
@@ -2458,6 +2426,7 @@
   ret i64 %V5
 }
 
+; TODO: Add support for partial redundant load elimination for loops.
 define i64 @test40(i64* %P, i64 %TC) {
 ;  Entry _
 ;    |  / |
@@ -2981,6 +2950,7 @@
   ret i64 %V9
 }
 
+; TODO: Add support for partial redundant load elimination for loops.
 define i64 @test45(i64* %P, i64 %TC1, i1 %Cond1, i64 %TC2) {
 ;  Entry _
 ;    |  / |
@@ -3105,6 +3075,7 @@
   ret i64 %V9
 }
 
+; TODO: Add support for partial redundant load elimination for loops.
 define i64 @test46(i64* %P, i64 %TC) {
 ;  Entry _
 ;    |  / |
@@ -3170,6 +3141,7 @@
   ret i64 %V5
 }
 
+; TODO: Add support for partial redundant load elimination for loops.
 define i64 @test47(i64* %P, i64 %TC) {
 ;  Entry _
 ;    |  / |
@@ -3236,6 +3208,7 @@
   ret i64 %V5
 }
 
+; TODO: Add support for partial redundant load elimination for loops.
 define i64 @test48(i64* %P, i64 %TC) {
 ;  Entry _
 ;    |  / |
@@ -3389,6 +3362,7 @@
   ret void
 }
 
+; TODO: Add support for partial redundant load elimination for loops.
 define i64 @test50(i64* %P, i64 %TC1, i1 %Cond1, i64 %TC2) {
 ;  Entry _
 ;    |  / |
@@ -3516,6 +3490,7 @@
   ret i64 %V9
 }
 
+; TODO: Add support for partial redundant load elimination for loops.
 define i64 @test51(i64* %P, i64 %TC, i1 %Cond) {
 ;    Entry
 ;      |
@@ -3714,6 +3689,7 @@
   ret i64 %V3
 }
 
+; TODO: Add support for partial redundant load elimination for loops.
 define i64 @test53(i64* %P, i64 %TC, i1 %Cond1) {
 ;    Entry
 ;      |
@@ -3768,11 +3744,14 @@
 ; NEWGVN:       BB1:
 ; NEWGVN-NEXT:    [[I1:%.*]] = insertelement <4 x i64> [[V1]], i64 [[INDEX]], i32 1
 ; NEWGVN-NEXT:    store <4 x i64> [[I1]], <4 x i64>* [[P1]], align 32
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i64> [[I1]] to i256
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i256 [[TMP0]] to i64
 ; NEWGVN-NEXT:    br label [[BB3:%.*]]
 ; NEWGVN:       BB2:
+; NEWGVN-NEXT:    [[V21:%.*]] = load i64, i64* [[P]], align 4
 ; NEWGVN-NEXT:    br label [[BB3]]
 ; NEWGVN:       BB3:
-; NEWGVN-NEXT:    [[V2:%.*]] = load i64, i64* [[P]], align 4
+; NEWGVN-NEXT:    [[V2:%.*]] = phi i64 [ [[V21]], [[BB2]] ], [ [[TMP1]], [[BB1]] ]
 ; NEWGVN-NEXT:    [[V3:%.*]] = add i64 [[V2]], [[INDEX]]
 ; NEWGVN-NEXT:    br label [[LOOP_LATCH]]
 ; NEWGVN:       Loop.Latch:
@@ -3854,14 +3833,14 @@
 ; NEWGVN:       BB2:
 ; NEWGVN-NEXT:    br label [[BB3]]
 ; NEWGVN:       BB3:
+; NEWGVN-NEXT:    [[PHIOFOPS:%.*]] = phi i32 [ 10, [[BB2]] ], [ 15, [[BB1]] ]
 ; NEWGVN-NEXT:    [[PHI1:%.*]] = phi i32 [ 10, [[BB1]] ], [ 5, [[BB2]] ]
 ; NEWGVN-NEXT:    [[COND2:%.*]] = icmp ne i32 [[V1:%.*]], 0
 ; NEWGVN-NEXT:    br i1 [[COND2]], label [[BB4:%.*]], label [[EXIT:%.*]]
 ; NEWGVN:       BB4:
-; NEWGVN-NEXT:    [[V6:%.*]] = add nsw i32 [[PHI1]], 5
 ; NEWGVN-NEXT:    br label [[EXIT]]
 ; NEWGVN:       Exit:
-; NEWGVN-NEXT:    [[PHI2:%.*]] = phi i32 [ [[PHI1]], [[BB3]] ], [ [[V6]], [[BB4]] ]
+; NEWGVN-NEXT:    [[PHI2:%.*]] = phi i32 [ [[PHI1]], [[BB3]] ], [ [[PHIOFOPS]], [[BB4]] ]
 ; NEWGVN-NEXT:    ret i32 [[PHI2]]
 ;
 Entry:
@@ -3936,8 +3915,7 @@
 ; NEWGVN-NEXT:    [[TMP3:%.*]] = trunc i128 [[TMP2]] to i32
 ; NEWGVN-NEXT:    br label [[EXIT]]
 ; NEWGVN:       Exit:
-; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ [[TMP1]], [[T]] ], [ [[TMP3]], [[F]] ]
-; NEWGVN-NEXT:    [[V5:%.*]] = load i32, i32* [[P]], align 4
+; NEWGVN-NEXT:    [[V5:%.*]] = phi i32 [ [[TMP1]], [[T]] ], [ [[TMP3]], [[F]] ]
 ; NEWGVN-NEXT:    ret i32 [[V5]]
 ;
 Entry:
@@ -4048,4 +4026,80 @@
   ret i32 %V4
 }
 
+define internal fastcc i32 @test58() {
+;   Entry
+;     |
+;    BB1
+;   /  |
+; BB2  |
+;   \  |
+;    BB3
+;   /  |
+; BB4  |
+;   \  |
+;    v v
+;    Exit
+;
+; OLDGVN-LABEL: @test58(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    store i32 0, i32* null, align 8
+; OLDGVN-NEXT:    br i1 false, label [[BB2:%.*]], label [[BB3:%.*]]
+; OLDGVN:       BB2:
+; OLDGVN-NEXT:    store i32 0, i32* null, align 8
+; OLDGVN-NEXT:    br label [[BB3]]
+; OLDGVN:       BB3:
+; OLDGVN-NEXT:    br i1 false, label [[BB4:%.*]], label [[EXIT:%.*]]
+; OLDGVN:       BB4:
+; OLDGVN-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 0 to i64
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    ret i32 0
+;
+; NEWGVN-LABEL: @test58(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    store i32 0, i32* null, align 8
+; NEWGVN-NEXT:    br label [[BB1:%.*]]
+; NEWGVN:       BB1:
+; NEWGVN-NEXT:    br i1 false, label [[BB2:%.*]], label [[BB3:%.*]]
+; NEWGVN:       BB2:
+; NEWGVN-NEXT:    store i8 poison, i8* null, align 1
+; NEWGVN-NEXT:    br label [[BB3]]
+; NEWGVN:       BB3:
+; NEWGVN-NEXT:    [[V2:%.*]] = tail call i32 @llvm.smax.i32(i32 0, i32 0)
+; NEWGVN-NEXT:    [[COND:%.*]] = icmp sgt i32 [[V2]], 0
+; NEWGVN-NEXT:    br i1 [[COND]], label [[BB4:%.*]], label [[EXIT:%.*]]
+; NEWGVN:       BB4:
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    ret i32 0
+;
+Entry:
+  store i32 0, i32* null, align 8
+  br label %BB1
+
+BB1:
+  br i1 false, label %BB2, label %BB3
+
+BB2:
+  store i32 0, i32* null, align 8
+  br label %BB3
+
+BB3:
+  %V1 = load i32, i32* null, align 4
+  %V2 = tail call i32 @llvm.smax.i32(i32 %V1, i32 0)
+  %Cond = icmp sgt i32 %V2, 0
+  br i1 %Cond, label %BB4, label %Exit
+
+BB4:
+  %wide.trip.count = zext i32 %V2 to i64
+  br label %Exit
+
+Exit:
+  ret i32 0
+}
+
+; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
+declare i32 @llvm.smax.i32(i32, i32) #1
+
 attributes #0 = { readnone }
+attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
diff --git a/llvm/test/Transforms/NewGVN/pr31483.ll b/llvm/test/Transforms/NewGVN/pr31483.ll
--- a/llvm/test/Transforms/NewGVN/pr31483.ll
+++ b/llvm/test/Transforms/NewGVN/pr31483.ll
@@ -10,20 +10,20 @@
 ; CHECK-LABEL: @ham(
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca i8*, align 8
-; CHECK-NEXT:    store i8* %arg1, i8** [[TMP]], align 8
-; CHECK-NEXT:    br label %bb2
+; CHECK-NEXT:    store i8* [[ARG1:%.*]], i8** [[TMP]], align 8
+; CHECK-NEXT:    br label [[BB2:%.*]]
 ; CHECK:       bb2:
-; CHECK-NEXT:    [[TMP3:%.*]] = phi i8* [ %arg, %bb ], [ %tmp7, %bb22 ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i8* [ [[ARG:%.*]], [[BB:%.*]] ], [ [[TMP7:%.*]], [[BB22:%.*]] ]
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i8, i8* [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label %bb6, label %bb23
+; CHECK-NEXT:    br i1 [[TMP5]], label [[BB6:%.*]], label [[BB23:%.*]]
 ; CHECK:       bb6:
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP7]] = getelementptr inbounds i8, i8* [[TMP3]], i32 1
 ; CHECK-NEXT:    [[TMP9:%.*]] = zext i8 [[TMP4]] to i32
-; CHECK-NEXT:    switch i32 [[TMP9]], label %bb22 [
-; CHECK-NEXT:    i32 115, label %bb10
-; CHECK-NEXT:    i32 105, label %bb16
-; CHECK-NEXT:    i32 99, label %bb16
+; CHECK-NEXT:    switch i32 [[TMP9]], label [[BB22]] [
+; CHECK-NEXT:    i32 115, label [[BB10:%.*]]
+; CHECK-NEXT:    i32 105, label [[BB16:%.*]]
+; CHECK-NEXT:    i32 99, label [[BB16]]
 ; CHECK-NEXT:    ]
 ; CHECK:       bb10:
 ; CHECK-NEXT:    [[TMP11:%.*]] = load i8*, i8** [[TMP]], align 8
@@ -32,16 +32,16 @@
 ; CHECK-NEXT:    [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i8**
 ; CHECK-NEXT:    [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8
 ; CHECK-NEXT:    [[TMP15:%.*]] = call signext i32 (i8*, ...) @zot(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @global, i32 0, i32 0), i8* [[TMP14]])
-; CHECK-NEXT:    br label %bb22
+; CHECK-NEXT:    br label [[BB22]]
 ; CHECK:       bb16:
 ; CHECK-NEXT:    [[TMP17:%.*]] = load i8*, i8** [[TMP]], align 8
 ; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[TMP17]], i64 8
 ; CHECK-NEXT:    store i8* [[TMP18]], i8** [[TMP]], align 8
 ; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i8, i8* [[TMP17]], i64 4
 ; CHECK-NEXT:    [[TMP20:%.*]] = bitcast i8* [[TMP19]] to i32*
-; CHECK-NEXT:    br label %bb22
+; CHECK-NEXT:    br label [[BB22]]
 ; CHECK:       bb22:
-; CHECK-NEXT:    br label %bb2
+; CHECK-NEXT:    br label [[BB2]]
 ; CHECK:       bb23:
 ; CHECK-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.va_end(i8* [[TMP24]])
diff --git a/llvm/test/Transforms/NewGVN/pr31613.ll b/llvm/test/Transforms/NewGVN/pr31613.ll
--- a/llvm/test/Transforms/NewGVN/pr31613.ll
+++ b/llvm/test/Transforms/NewGVN/pr31613.ll
@@ -73,7 +73,7 @@
 
 define void @e(i32 %a0, i32 %a1, %struct.a** %p2) {
 ; CHECK-LABEL: @e(
-; CHECK-NEXT:    [[F:%.*]] = alloca i32
+; CHECK-NEXT:    [[F:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    store i32 [[A0:%.*]], i32* [[F]], align 4, !g !0
 ; CHECK-NEXT:    br label [[H:%.*]]
 ; CHECK:       h:
diff --git a/llvm/test/Transforms/NewGVN/pr32836.ll b/llvm/test/Transforms/NewGVN/pr32836.ll
--- a/llvm/test/Transforms/NewGVN/pr32836.ll
+++ b/llvm/test/Transforms/NewGVN/pr32836.ll
@@ -5,21 +5,23 @@
 @b = external global %struct.anon
 define void @tinkywinky(i1 %patatino) {
 ; CHECK-LABEL: @tinkywinky(
-; CHECK-NEXT:    store i32 8, i32* null
+; CHECK-NEXT:    store i32 8, i32* null, align 4
 ; CHECK-NEXT:    br i1 [[PATATINO:%.*]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    br label [[L:%.*]]
 ; CHECK:       L:
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* null, align 4
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* null
-; CHECK-NEXT:    [[BF_LOAD1:%.*]] = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @b, i64 0, i32 0)
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[TMP11]], [[L]] ], [ 8, [[TMP0:%.*]] ]
+; CHECK-NEXT:    [[BF_LOAD1:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_ANON:%.*]], %struct.anon* @b, i64 0, i32 0), align 4
 ; CHECK-NEXT:    [[BF_VALUE:%.*]] = and i32 [[TMP1]], 536870911
 ; CHECK-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -536870912
 ; CHECK-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
-; CHECK-NEXT:    store i32 [[BF_SET]], i32* getelementptr inbounds (%struct.anon, %struct.anon* @b, i64 0, i32 0)
+; CHECK-NEXT:    store i32 [[BF_SET]], i32* getelementptr inbounds ([[STRUCT_ANON]], %struct.anon* @b, i64 0, i32 0), align 4
 ; CHECK-NEXT:    br label [[LOR_END:%.*]]
 ; CHECK:       lor.end:
+; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[BF_SET]], 536870911
 ; CHECK-NEXT:    br label [[L]]
 ;
   store i32 8, i32* null
diff --git a/llvm/test/Transforms/NewGVN/pr32934.ll b/llvm/test/Transforms/NewGVN/pr32934.ll
--- a/llvm/test/Transforms/NewGVN/pr32934.ll
+++ b/llvm/test/Transforms/NewGVN/pr32934.ll
@@ -1,39 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -passes=newgvn %s | FileCheck %s
 
-; CHECK: define void @tinkywinky() {
-; CHECK-NEXT: entry:
-; CHECK-NEXT:   %d = alloca i32, align 4
-; CHECK-NEXT:   store i32 0, i32* null, align 4
-; CHECK-NEXT:   br label %for.cond
-; CHECK: for.cond:                                         ; preds = %if.end, %entry
-; CHECK-NEXT:   %0 = load i32, i32* null, align 4
-; CHECK-NEXT:   %cmp = icmp slt i32 %0, 1
-; CHECK-NEXT:   br i1 %cmp, label %for.body, label %while.cond
-; CHECK: for.body:                                         ; preds = %for.cond
-; CHECK-NEXT:   %1 = load i32, i32* @a, align 4
-; CHECK-NEXT:   store i32 %1, i32* %d, align 4
-; CHECK-NEXT:   br label %L
-; CHECK: L:                                                ; preds = %if.then, %for.body
-; CHECK-NEXT:   %tobool = icmp ne i32 %1, 0
-; CHECK-NEXT:   br i1 %tobool, label %if.then, label %if.end
-; CHECK: if.then:                                          ; preds = %L
-; CHECK-NEXT:   call void (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @patatino, i32 0, i32 0))
-; CHECK-NEXT:   br label %L
-; CHECK: if.end:                                           ; preds = %L
-; CHECK-NEXT:   br label %for.cond
-; CHECK: while.cond:                                       ; preds = %while.body, %for.cond
-; CHECK-NEXT:   br i1 undef, label %while.body, label %while.end
-; CHECK: while.body:                                       ; preds = %while.cond
-; CHECK-NEXT:   call void (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @patatino, i32 0, i32 0))
-; CHECK-NEXT:   br label %while.cond
-; CHECK: while.end:
-; CHECK-NEXT:   %2 = load i32, i32* @a, align 4
-; CHECK-NEXT:   store i32 %2, i32* undef, align 4
-; CHECK-NEXT:   ret void
-
 @a = external global i32, align 4
 @patatino = external unnamed_addr constant [2 x i8], align 1
 define void @tinkywinky() {
+; CHECK-LABEL: @tinkywinky(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[D:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 0, i32* null, align 4
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* null, align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 1
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[WHILE_COND:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @a, align 4
+; CHECK-NEXT:    store i32 [[TMP1]], i32* [[D]], align 4
+; CHECK-NEXT:    br label [[L:%.*]]
+; CHECK:       L:
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @patatino, i32 0, i32 0))
+; CHECK-NEXT:    br label [[L]]
+; CHECK:       if.end:
+; CHECK-NEXT:    br label [[FOR_COND]]
+; CHECK:       while.cond:
+; CHECK-NEXT:    br i1 undef, label [[WHILE_BODY:%.*]], label [[WHILE_END:%.*]]
+; CHECK:       while.body:
+; CHECK-NEXT:    call void (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @patatino, i32 0, i32 0))
+; CHECK-NEXT:    br label [[WHILE_COND]]
+; CHECK:       while.end:
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* @a, align 4
+; CHECK-NEXT:    store i32 [[TMP2]], i32* undef, align 4
+; CHECK-NEXT:    ret void
+;
 entry:
   %d = alloca i32, align 4
   store i32 0, i32* null, align 4
diff --git a/llvm/test/Transforms/NewGVN/pr35125.ll b/llvm/test/Transforms/NewGVN/pr35125.ll
--- a/llvm/test/Transforms/NewGVN/pr35125.ll
+++ b/llvm/test/Transforms/NewGVN/pr35125.ll
@@ -29,7 +29,7 @@
 ; CHECK-NEXT:    store i32 [[TMP1]], i32* @a, align 4
 ; CHECK-NEXT:    br label [[IF_END6]]
 ; CHECK:       if.end6:
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* @a, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[TMP1]], [[LOR_END]] ], [ [[TMP0]], [[IF_END]] ]
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 [[TMP2]])
 ; CHECK-NEXT:    ret i32 0
 ;
diff --git a/llvm/test/Transforms/NewGVN/refine-stores.ll b/llvm/test/Transforms/NewGVN/refine-stores.ll
--- a/llvm/test/Transforms/NewGVN/refine-stores.ll
+++ b/llvm/test/Transforms/NewGVN/refine-stores.ll
@@ -15,7 +15,7 @@
 ; CHECK-LABEL: @spam(
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[FOO:%.*]] = bitcast i32* [[A:%.*]] to %struct.eggs**
-; CHECK-NEXT:    store %struct.eggs* null, %struct.eggs** [[FOO]]
+; CHECK-NEXT:    store %struct.eggs* null, %struct.eggs** [[FOO]], align 8
 ; CHECK-NEXT:    br label [[BB1:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    br i1 undef, label [[BB3:%.*]], label [[BB2:%.*]]
@@ -23,8 +23,8 @@
 ; CHECK-NEXT:    call void @baz()
 ; CHECK-NEXT:    br label [[BB1]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    store i32 0, i32* undef
-; CHECK-NEXT:    store %struct.eggs* null, %struct.eggs** [[FOO]]
+; CHECK-NEXT:    store i32 0, i32* undef, align 4
+; CHECK-NEXT:    store %struct.eggs* null, %struct.eggs** [[FOO]], align 8
 ; CHECK-NEXT:    unreachable
 ;
 bb:
@@ -58,11 +58,11 @@
 ; CHECK-NEXT:  b:
 ; CHECK-NEXT:    br label [[C:%.*]]
 ; CHECK:       c:
-; CHECK-NEXT:    store i64 undef, i64* null
+; CHECK-NEXT:    store i64 undef, i64* null, align 4
 ; CHECK-NEXT:    br label [[E:%.*]]
 ; CHECK:       e:
-; CHECK-NEXT:    [[G:%.*]] = load i64*, i64** null
-; CHECK-NEXT:    store i64* undef, i64** null
+; CHECK-NEXT:    [[G:%.*]] = load i64*, i64** null, align 8
+; CHECK-NEXT:    store i64* undef, i64** null, align 8
 ; CHECK-NEXT:    br i1 undef, label [[C]], label [[E]]
 ;
 b:
@@ -90,16 +90,16 @@
 ; CHECK-NEXT:    br label [[BB1:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    [[TMP:%.*]] = phi %struct.hoge* [ [[ARG:%.*]], [[BB:%.*]] ], [ null, [[BB1]] ]
-; CHECK-NEXT:    store %struct.hoge* [[TMP]], %struct.hoge** undef
+; CHECK-NEXT:    store %struct.hoge* [[TMP]], %struct.hoge** undef, align 8
 ; CHECK-NEXT:    br i1 undef, label [[BB1]], label [[BB2:%.*]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    [[TMP3:%.*]] = phi i64 [ [[TMP8:%.*]], [[BB7:%.*]] ], [ 0, [[BB1]] ]
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[TMP3]], 0
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[BB7]], label [[BB5:%.*]]
 ; CHECK:       bb5:
-; CHECK-NEXT:    [[TMP6:%.*]] = load i64, i64* null
+; CHECK-NEXT:    [[TMP6:%.*]] = load i64, i64* null, align 4
 ; CHECK-NEXT:    call void @quux()
-; CHECK-NEXT:    store i64 [[TMP6]], i64* undef
+; CHECK-NEXT:    store i64 [[TMP6]], i64* undef, align 4
 ; CHECK-NEXT:    br label [[BB7]]
 ; CHECK:       bb7:
 ; CHECK-NEXT:    [[TMP8]] = add i64 [[TMP3]], 1
@@ -137,14 +137,14 @@
 
 define void @b() {
 ; CHECK-LABEL: @b(
-; CHECK-NEXT:    [[C:%.*]] = alloca [[STRUCT_A:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = alloca [[STRUCT_A:%.*]], align 8
 ; CHECK-NEXT:    br label [[D:%.*]]
 ; CHECK:       m:
 ; CHECK-NEXT:    unreachable
 ; CHECK:       d:
 ; CHECK-NEXT:    [[G:%.*]] = bitcast %struct.a* [[C]] to i8*
 ; CHECK-NEXT:    [[F:%.*]] = bitcast i8* [[G]] to i32*
-; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[F]]
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[F]], align 4
 ; CHECK-NEXT:    br i1 undef, label [[I:%.*]], label [[J:%.*]]
 ; CHECK:       i:
 ; CHECK-NEXT:    br i1 undef, label [[K:%.*]], label [[M:%.*]]
diff --git a/llvm/test/Transforms/NewGVN/rle-nonlocal.ll b/llvm/test/Transforms/NewGVN/rle-nonlocal.ll
--- a/llvm/test/Transforms/NewGVN/rle-nonlocal.ll
+++ b/llvm/test/Transforms/NewGVN/rle-nonlocal.ll
@@ -7,14 +7,14 @@
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[BLOCK2:%.*]], label [[BLOCK3:%.*]]
 ; CHECK:       block2:
-; CHECK-NEXT:    [[A:%.*]] = load i32*, i32** [[P:%.*]]
+; CHECK-NEXT:    [[A:%.*]] = load i32*, i32** [[P:%.*]], align 8
 ; CHECK-NEXT:    br label [[BLOCK4:%.*]]
 ; CHECK:       block3:
-; CHECK-NEXT:    [[B:%.*]] = load i32*, i32** [[P]]
+; CHECK-NEXT:    [[B:%.*]] = load i32*, i32** [[P]], align 8
 ; CHECK-NEXT:    br label [[BLOCK4]]
 ; CHECK:       block4:
-; CHECK-NEXT:    [[EXISTINGPHI:%.*]] = phi i32* [ [[A]], [[BLOCK2]] ], [ [[B]], [[BLOCK3]] ]
-; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[EXISTINGPHI]]
+; CHECK-NEXT:    [[DEAD:%.*]] = phi i32* [ [[A]], [[BLOCK2]] ], [ [[B]], [[BLOCK3]] ]
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[DEAD]], align 4
 ; CHECK-NEXT:    [[E:%.*]] = add i32 [[C]], [[C]]
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
diff --git a/llvm/test/Transforms/NewGVN/storeoverstore.ll b/llvm/test/Transforms/NewGVN/storeoverstore.ll
--- a/llvm/test/Transforms/NewGVN/storeoverstore.ll
+++ b/llvm/test/Transforms/NewGVN/storeoverstore.ll
@@ -61,13 +61,13 @@
 ; CHECK:       5:
 ; CHECK-NEXT:    br label [[TMP6]]
 ; CHECK:       6:
+; CHECK-NEXT:    [[PHIOFOPS:%.*]] = phi i32 [ 10, [[TMP5]] ], [ 15, [[TMP4]] ]
 ; CHECK-NEXT:    [[DOT0:%.*]] = phi i32 [ 10, [[TMP4]] ], [ 5, [[TMP5]] ]
-; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP7:%.*]], label [[TMP9:%.*]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP7:%.*]], label [[TMP8:%.*]]
 ; CHECK:       7:
-; CHECK-NEXT:    [[TMP8:%.*]] = add nsw i32 [[DOT0]], 5
-; CHECK-NEXT:    br label [[TMP9]]
-; CHECK:       9:
-; CHECK-NEXT:    [[DOT1:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ [[DOT0]], [[TMP6]] ]
+; CHECK-NEXT:    br label [[TMP8]]
+; CHECK:       8:
+; CHECK-NEXT:    [[DOT1:%.*]] = phi i32 [ [[PHIOFOPS]], [[TMP7]] ], [ [[DOT0]], [[TMP6]] ]
 ; CHECK-NEXT:    ret i32 [[DOT1]]
 ;
   store i32 5, i32* %0, align 4