Index: llvm/include/llvm/Transforms/Scalar/GVN.h
===================================================================
--- llvm/include/llvm/Transforms/Scalar/GVN.h
+++ llvm/include/llvm/Transforms/Scalar/GVN.h
@@ -45,7 +45,9 @@
 class LoadInst;
 class LoopInfo;
 class MemDepResult;
+class MemoryAccess;
 class MemoryDependenceResults;
+class MemoryLocation;
 class MemorySSA;
 class MemorySSAUpdater;
 class NonLocalDepResult;
@@ -241,6 +243,7 @@
   OptimizationRemarkEmitter *ORE = nullptr;
   ImplicitControlFlowTracking *ICF = nullptr;
   LoopInfo *LI = nullptr;
+  AAResults *AA = nullptr;
   MemorySSAUpdater *MSSAU = nullptr;
 
   ValueTable VN;
@@ -327,21 +330,60 @@
   // List of critical edges to be split between iterations.
   SmallVector<std::pair<Instruction *, unsigned>, 4> toSplit;
 
+  enum class DepKind {
+    Other = 0, // Unknown value
+    Def,       // Exaclty overlapping locations.
+    Clobber,   // Reaching value superset of needed bits.
+  };
+
+  struct ReachingMemVal {
+    DepKind Kind;
+    BasicBlock *Block;
+    const Value *Addr;
+    Instruction *Inst;
+    int32_t Offset;
+
+    static ReachingMemVal getUnknown(BasicBlock *BB,
+                                     const Value *Addr = nullptr,
+                                     Instruction *Inst = nullptr) {
+      return {DepKind::Other, BB, Addr, Inst, -1};
+    }
+
+    static ReachingMemVal getDef(const Value *Addr, Instruction *Inst) {
+      return {DepKind::Def, Inst->getParent(), Addr, Inst, -1};
+    }
+
+    static ReachingMemVal getClobber(const Value *Addr, Instruction *Inst,
+                                     int32_t Offset = -1) {
+      return {DepKind::Clobber, Inst->getParent(), Addr, Inst, Offset};
+    }
+  };
+
+  Optional<ReachingMemVal> findReachingValueForLoadInBlock(
+      const MemoryLocation &Loc, bool IsInvariantload, BasicBlock *BB,
+      Instruction *DomLower, Instruction *DomUpper, MemoryAccess *ClobberMA,
+      MemorySSA &MSSA, AAResults &AA);
+
+  void findReachingValuesForLoad(LoadInst *Inst, MemorySSA &MSSA, AAResults &AA,
+                                 SmallVectorImpl<ReachingMemVal> &Values);
+
   // Helper functions of redundant load elimination
   bool processLoad(LoadInst *L);
   bool processNonLocalLoad(LoadInst *L);
+  bool processNonLocalLoad(LoadInst *L, SmallVectorImpl<ReachingMemVal> &Deps);
   bool processAssumeIntrinsic(AssumeInst *II);
 
   /// Given a local dependency (Def or Clobber) determine if a value is
   /// available for the load.  Returns true if an value is known to be
   /// available and populates Res.  Returns false otherwise.
-  bool AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
-                               Value *Address, gvn::AvailableValue &Res);
+  bool AnalyzeLoadAvailability(LoadInst *Load, const ReachingMemVal &Dep,
+                               gvn::AvailableValue &Res);
 
   /// Given a list of non-local dependencies, determine if a value is
   /// available for the load in each specified block.  If it is, add it to
   /// ValuesPerBlock.  If not, add it to UnavailableBlocks.
-  void AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
+  void AnalyzeLoadAvailability(LoadInst *Load,
+                               SmallVectorImpl<ReachingMemVal> &Deps,
                                AvailValInBlkVect &ValuesPerBlock,
                                UnavailBlkVect &UnavailableBlocks);
 
Index: llvm/lib/Transforms/Scalar/GVN.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/GVN.cpp
+++ llvm/lib/Transforms/Scalar/GVN.cpp
@@ -1056,7 +1056,7 @@
 
 /// Try to locate the three instruction involved in a missed
 /// load-elimination case that is due to an intervening store.
-static void reportMayClobberedLoad(LoadInst *Load, MemDepResult DepInfo,
+static void reportMayClobberedLoad(LoadInst *Load, Instruction *DepInst,
                                    DominatorTree *DT,
                                    OptimizationRemarkEmitter *ORE) {
   using namespace ore;
@@ -1111,21 +1111,22 @@
   if (OtherAccess)
     R << " in favor of " << NV("OtherAccess", OtherAccess);
 
-  R << " because it is clobbered by " << NV("ClobberedBy", DepInfo.getInst());
+  R << " because it is clobbered by " << NV("ClobberedBy", DepInst);
 
   ORE->emit(R);
 }
 
-bool GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
-                                      Value *Address, AvailableValue &Res) {
-  assert((DepInfo.isDef() || DepInfo.isClobber()) &&
+bool GVNPass::AnalyzeLoadAvailability(LoadInst *Load, const ReachingMemVal &Dep,
+                                      AvailableValue &Res) {
+  assert((Dep.Kind == DepKind::Def || Dep.Kind == DepKind::Clobber) &&
          "expected a local dependence");
   assert(Load->isUnordered() && "rules below are incorrect for ordered access");
 
   const DataLayout &DL = Load->getModule()->getDataLayout();
 
-  Instruction *DepInst = DepInfo.getInst();
-  if (DepInfo.isClobber()) {
+  Value *Address = const_cast<Value *>(Dep.Addr);
+  Instruction *DepInst = Dep.Inst;
+  if (Dep.Kind == DepKind::Clobber) {
     // If the dependence is to a store that writes to a superset of the bits
     // read by the load, we can extract the bits we need for the load from the
     // stored value.
@@ -1152,16 +1153,20 @@
       if (DepLoad != Load && Address &&
           Load->isAtomic() <= DepLoad->isAtomic()) {
         Type *LoadType = Load->getType();
-        int Offset = -1;
-
-        // If MD reported clobber, check it was nested.
-        if (DepInfo.isClobber() &&
-            canCoerceMustAliasedValueToLoad(DepLoad, LoadType, DL)) {
-          const auto ClobberOff = MD->getClobberOffset(DepLoad);
-          // GVN has no deal with a negative offset.
-          Offset = (ClobberOff == None || ClobberOff.getValue() < 0)
-                       ? -1
-                       : ClobberOff.getValue();
+        int Offset = Dep.Offset;
+        if (MD && !MSSAU) {
+          // If MD reported clobber, check it was nested.
+          if (canCoerceMustAliasedValueToLoad(DepLoad, LoadType, DL)) {
+            const auto ClobberOff = MD->getClobberOffset(DepLoad);
+            // GVN has no deal with a negative offset.
+            Offset = (ClobberOff == None || ClobberOff.getValue() < 0)
+                         ? -1
+                         : ClobberOff.getValue();
+          }
+        } else {
+          if (!canCoerceMustAliasedValueToLoad(DepLoad, LoadType, DL) ||
+              Offset < 0)
+            Offset = -1;
         }
         if (Offset == -1)
           Offset =
@@ -1191,11 +1196,11 @@
         dbgs() << "GVN: load "; Load->printAsOperand(dbgs());
         dbgs() << " is clobbered by " << *DepInst << '\n';);
     if (ORE->allowExtraAnalysis(DEBUG_TYPE))
-      reportMayClobberedLoad(Load, DepInfo, DT, ORE);
+      reportMayClobberedLoad(Load, DepInst, DT, ORE);
 
     return false;
   }
-  assert(DepInfo.isDef() && "follows from above");
+  assert(Dep.Kind == DepKind::Def && "follows from above");
 
   // Loading the alloca -> undef.
   // Loading immediately after lifetime begin -> undef.
@@ -1250,7 +1255,8 @@
   return false;
 }
 
-void GVNPass::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
+void GVNPass::AnalyzeLoadAvailability(LoadInst *Load,
+                                      SmallVectorImpl<ReachingMemVal> &Deps,
                                       AvailValInBlkVect &ValuesPerBlock,
                                       UnavailBlkVect &UnavailableBlocks) {
   // Filter out useless results (non-locals, etc).  Keep track of the blocks
@@ -1259,17 +1265,16 @@
   // that could potentially clobber the load).
   unsigned NumDeps = Deps.size();
   for (unsigned i = 0, e = NumDeps; i != e; ++i) {
-    BasicBlock *DepBB = Deps[i].getBB();
-    MemDepResult DepInfo = Deps[i].getResult();
-
+    const auto &Dep = Deps[i];
+    BasicBlock *DepBB = Dep.Block;
     if (DeadBlocks.count(DepBB)) {
       // Dead dependent mem-op disguise as a load evaluating the same value
       // as the load in question.
-      ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB));
+      ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(Dep.Block));
       continue;
     }
 
-    if (!DepInfo.isDef() && !DepInfo.isClobber()) {
+    if (Dep.Kind != DepKind::Def && Dep.Kind != DepKind::Clobber) {
       UnavailableBlocks.push_back(DepBB);
       continue;
     }
@@ -1277,10 +1282,8 @@
     // The address being loaded in this non-local block may not be the same as
     // the pointer operand of the load if PHI translation occurs.  Make sure
     // to consider the right address.
-    Value *Address = Deps[i].getAddress();
-
     AvailableValue AV;
-    if (AnalyzeLoadAvailability(Load, DepInfo, Address, AV)) {
+    if (AnalyzeLoadAvailability(Load, Dep, AV)) {
       // subtlety: because we know this was a non-local dependency, we know
       // it's safe to materialize anywhere between the instruction within
       // DepInfo and the end of it's block.
@@ -1349,7 +1352,8 @@
     // Add the newly created load.
     ValuesPerBlock.push_back(
         AvailableValueInBlock::get(UnavailableBlock, NewLoad));
-    MD->invalidateCachedPointerInfo(LoadPtr);
+    if (MD)
+      MD->invalidateCachedPointerInfo(LoadPtr);
     LLVM_DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
   }
 
@@ -1360,7 +1364,7 @@
     V->takeName(Load);
   if (Instruction *I = dyn_cast<Instruction>(V))
     I->setDebugLoc(Load->getDebugLoc());
-  if (V->getType()->isPtrOrPtrVectorTy())
+  if (MD && V->getType()->isPtrOrPtrVectorTy())
     MD->invalidateCachedPointerInfo(V);
   markInstructionForDeletion(Load);
   ORE->emit([&]() {
@@ -1711,7 +1715,7 @@
           Attribute::SanitizeHWAddress))
     return false;
 
-  // Step 1: Find the non-local dependencies of the load.
+  // Find the non-local dependencies of the load.
   LoadDepVect Deps;
   MD->getNonLocalPointerDependency(Load, Deps);
 
@@ -1722,10 +1726,27 @@
   if (NumDeps > MaxNumDeps)
     return false;
 
+  SmallVector<ReachingMemVal, 64> MemVals;
+  for (const NonLocalDepResult &Dep : Deps) {
+    Value *Address = Dep.getAddress();
+    BasicBlock *BB = Dep.getBB();
+    Instruction *Inst = Dep.getResult().getInst();
+    if (Dep.getResult().isClobber())
+      MemVals.emplace_back(ReachingMemVal::getClobber(Address, Inst));
+    else if (Dep.getResult().isDef())
+      MemVals.emplace_back(ReachingMemVal::getDef(Address, Inst));
+    else
+      MemVals.emplace_back(ReachingMemVal::getUnknown(BB));
+  }
+  return processNonLocalLoad(Load, MemVals);
+}
+
+bool GVNPass ::processNonLocalLoad(LoadInst *Load,
+                                   SmallVectorImpl<ReachingMemVal> &Deps) {
   // If we had a phi translation failure, we'll have a single entry which is a
   // clobber in the current block.  Reject this early.
-  if (NumDeps == 1 &&
-      !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
+  if (Deps.size() == 1 && Deps[0].Kind != DepKind::Def &&
+      Deps[0].Kind != DepKind::Clobber) {
     LLVM_DEBUG(dbgs() << "GVN: non-local load "; Load->printAsOperand(dbgs());
                dbgs() << " has unknown dependencies\n";);
     return false;
@@ -1770,7 +1791,7 @@
       // to propagate Load's DebugLoc because Load may not post-dominate I.
       if (Load->getDebugLoc() && Load->getParent() == I->getParent())
         I->setDebugLoc(Load->getDebugLoc());
-    if (V->getType()->isPtrOrPtrVectorTy())
+    if (MD && V->getType()->isPtrOrPtrVectorTy())
       MD->invalidateCachedPointerInfo(V);
     markInstructionForDeletion(Load);
     ++NumGVNLoad;
@@ -1994,10 +2015,406 @@
   I->replaceAllUsesWith(Repl);
 }
 
+Optional<GVNPass::ReachingMemVal> GVNPass::findReachingValueForLoadInBlock(
+    const MemoryLocation &Loc, bool IsInvariantLoad, BasicBlock *BB,
+    Instruction *DomLower, Instruction *DomUpper, MemoryAccess *ClobberMA,
+    MemorySSA &MSSA, AAResults &AA) {
+
+  auto updateChoice = [&](ReachingMemVal &Choice, AliasResult &AR,
+                          Instruction *Candidate) {
+    // TODO: Worth choosing between exact or partial overlap ?
+    if (Choice.Kind == DepKind::Other)
+      Choice.Inst = Candidate;
+    else if (MSSA.locallyDominates(MSSA.getMemoryAccess(Choice.Inst),
+                                   MSSA.getMemoryAccess(Candidate)))
+      Choice.Inst = Candidate;
+    else
+      return;
+
+    if (AR == AliasResult::PartialAlias) {
+      Choice.Kind = DepKind::Clobber;
+      Choice.Offset = AR.getOffset();
+    } else {
+      Choice.Kind = DepKind::Def;
+      Choice.Offset = -1;
+    }
+    Choice.Block = Candidate->getParent();
+  };
+
+  // Lower bound is inclusive, upper bound is exclusive.
+  auto isBetweenBounds = [&](const MemoryUseOrDef *U) {
+    if (DomLower == nullptr && DomUpper == nullptr)
+      return true;
+    MemoryAccess *Lower =
+        DomLower == nullptr ? nullptr : MSSA.getMemoryAccess(DomLower);
+    if (Lower != nullptr && !MSSA.locallyDominates(Lower, U))
+      return false;
+    MemoryAccess *Upper =
+        DomUpper == nullptr ? nullptr : MSSA.getMemoryAccess(DomUpper);
+    return Upper == nullptr || (U != Upper && MSSA.locallyDominates(U, Upper));
+  };
+
+  // For all the users of the clobbering access.
+  auto ReachingVal = ReachingMemVal::getUnknown(BB, Loc.Ptr);
+  const auto *MemAccessList = MSSA.getBlockAccesses(BB);
+  if (MemAccessList == nullptr)
+    return None;
+  for (const MemoryAccess &MA : *MemAccessList) {
+    auto *UseOrDef = dyn_cast<MemoryUseOrDef>(&MA);
+    // Invariant loads have `liveOnEntry` as a cloberring access in MemorySSA,
+    // in which case we walk over all memory accesses in the block.
+    if (UseOrDef == nullptr ||
+        (!IsInvariantLoad && UseOrDef->getDefiningAccess() != ClobberMA))
+      continue;
+
+    // We are interested only in loads here, and, in the case of invariant load,
+    // in the stores.
+    Instruction *M = UseOrDef->getMemoryInst();
+    auto *L = dyn_cast<LoadInst>(M);
+    if (!IsInvariantLoad && L == nullptr)
+      continue;
+    auto *S = dyn_cast<StoreInst>(M);
+    // If it's not a load or a store, it cannot give is a useful value for
+    // elliminating the load.
+    if (L == nullptr && S == nullptr)
+      continue;
+
+    // Skip if the use is not within the bounds.
+    if (!isBetweenBounds(UseOrDef))
+      continue;
+
+    AliasResult AR = AA.alias(MemoryLocation::get(M), Loc);
+    // If the locations do not certainly alias, we cannot possibly infer the
+    // following load loads the same value.
+    if (AR == AliasResult::NoAlias || AR == AliasResult::MayAlias)
+      continue;
+
+    // Locations partially overlap, but neither is a subset of the other, or the
+    // second location is before the first.
+    if (AR == AliasResult::PartialAlias &&
+        (!AR.hasOffset() || AR.getOffset() < 0))
+      continue;
+
+    // Locations precisely overlap or the second accesses subset of the bits of
+    // the first.
+    updateChoice(ReachingVal, AR, M);
+  }
+
+  // Found something.
+  if (ReachingVal.Kind != DepKind::Other)
+    return ReachingVal;
+
+  // If the clobbering access is the entry memory state, continue the search
+  // into predecessors, unless the load is from a local object in which case
+  // return the allocation instruction.
+  if (MSSA.isLiveOnEntryDef(ClobberMA)) {
+    auto *Alloc = dyn_cast<AllocaInst>(getUnderlyingObject(Loc.Ptr));
+    if (Alloc != nullptr && Alloc->getParent() == BB)
+      return ReachingMemVal::getDef(Loc.Ptr, const_cast<AllocaInst *>(Alloc));
+
+    return None;
+  }
+
+  // If the clobberring access is a MemoryPhi or in another block, go to
+  // predecessors.
+  if (ClobberMA->getBlock() != BB || isa<MemoryPhi>(ClobberMA))
+    return None;
+
+  Instruction *ClobberInst = cast<MemoryDef>(ClobberMA)->getMemoryInst();
+  auto getOrdering = [](const Instruction *I) {
+    assert(isa<LoadInst>(I) || isa<StoreInst>(I));
+    if (const auto *L = dyn_cast<LoadInst>(I))
+      return L->getOrdering();
+    return cast<StoreInst>(I)->getOrdering();
+  };
+
+  // Check if the clobbering access is a load or a store that we can reuse.
+  if (isa<StoreInst>(ClobberInst) || isa<LoadInst>(ClobberInst)) {
+    AliasResult AR = AA.alias(MemoryLocation::get(ClobberInst), Loc);
+    if (AR == AliasResult::MustAlias)
+      return ReachingMemVal::getDef(Loc.Ptr, ClobberInst);
+
+    if (AR == AliasResult::NoAlias) {
+      // May happen with atomic/volatile load/store or MemorySSA imprecision.
+      if (!ClobberInst->isAtomic() ||
+          !isStrongerThan(getOrdering(ClobberInst), AtomicOrdering::Monotonic))
+        return None;
+      return ReachingMemVal::getClobber(Loc.Ptr, ClobberInst);
+    }
+
+    if (AR == AliasResult::MayAlias ||
+        (AR == AliasResult::PartialAlias &&
+         (!AR.hasOffset() || AR.getOffset() < 0)))
+      return ReachingMemVal::getClobber(Loc.Ptr, ClobberInst);
+
+    // The only option left is a store of the superset of the required bits.
+    assert(AR == AliasResult::PartialAlias && AR.hasOffset() &&
+           AR.getOffset() > 0 && "Follows from the conditions above");
+    return ReachingMemVal::getClobber(Loc.Ptr, ClobberInst, AR.getOffset());
+  }
+
+  // If we are at a malloc-like function call, we can turn the load into `undef`
+  // or zero.
+  if (isNoAliasCall(ClobberInst)) {
+    const Value *Obj = getUnderlyingObject(Loc.Ptr);
+    if (Obj == ClobberInst || AA.isMustAlias(ClobberInst, Loc.Ptr))
+      return ReachingMemVal::getDef(Loc.Ptr, ClobberInst);
+  }
+
+  // Can reorder loads across a release fence.
+  if (auto *Fence = dyn_cast<FenceInst>(ClobberInst)) {
+    if (Fence->getOrdering() == AtomicOrdering::Release)
+      return None;
+  }
+
+  // See if the clobber instruction (e.g. a call) may modify the location.
+  ModRefInfo MR = AA.getModRefInfo(ClobberInst, Loc);
+  // If modification is possible, analyse deeper, to exclude accesses to
+  // non-escaping local allocations.
+  if (isModAndRefSet(MR))
+    MR = AA.callCapturesBefore(ClobberInst, Loc, DT);
+  MR = clearMust(MR);
+  if (MR == ModRefInfo::NoModRef || MR == ModRefInfo::Ref)
+    return None;
+
+  // TODO: Masked load/store?
+
+  // Conservatively return unknown value for the load.
+  return ReachingMemVal::getClobber(Loc.Ptr, ClobberInst);
+}
+
+static Instruction *findInvariantGroupValue(LoadInst *L, DominatorTree &DT) {
+  // We consider bitcasts and zero GEPs to be the same pointer value. Start by
+  // stripping bitcasts and zero GEPs, then we will recursively look at loads
+  // and stores through bitcasts and zero GEPs.
+  Value *PointerOperand = L->getPointerOperand()->stripPointerCasts();
+
+  // It's not safe to walk the use list of a global value because function
+  // passes aren't allowed to look outside their functions.
+  // FIXME: this could be fixed by filtering instructions from outside of
+  // current function.
+  if (isa<Constant>(PointerOperand))
+    return nullptr;
+
+  // Queue to process all pointers that are equivalent to load operand.
+  SmallVector<Value *, 8> PointerUsesQueue;
+  PointerUsesQueue.push_back(PointerOperand);
+
+  Instruction *MostDominatingInstruction = L;
+
+  // FIXME: This loop is O(n^2) because dominates can be O(n) and in worst case
+  // we will see all the instructions.
+  while (!PointerUsesQueue.empty()) {
+    Value *Ptr = PointerUsesQueue.pop_back_val();
+    assert(Ptr && !isa<GlobalValue>(Ptr) &&
+           "Null or GlobalValue should not be inserted");
+
+    for (User *Us : Ptr->users()) {
+      auto *U = dyn_cast<Instruction>(Us);
+      if (!U || U == L || !DT.dominates(U, MostDominatingInstruction))
+        continue;
+
+      // Add bitcasts and zero GEPs to queue.
+      if (isa<BitCastInst>(U)) {
+        PointerUsesQueue.push_back(U);
+        continue;
+      }
+      if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
+        if (GEP->hasAllZeroIndices())
+          PointerUsesQueue.push_back(U);
+        continue;
+      }
+
+      // If we hit a load/store with an invariant.group metadata and the same
+      // pointer operand, we can assume that value pointed to by the pointer
+      // operand didn't change.
+      if (U->hasMetadata(LLVMContext::MD_invariant_group) &&
+          getLoadStorePointerOperand(U) == Ptr && !U->isVolatile()) {
+        MostDominatingInstruction = U;
+      }
+    }
+  }
+  return MostDominatingInstruction == L ? nullptr : MostDominatingInstruction;
+}
+
+void GVNPass::findReachingValuesForLoad(
+    LoadInst *L, MemorySSA &MSSA, AAResults &AA,
+    SmallVectorImpl<ReachingMemVal> &Values) {
+
+  struct WorkItem {
+    WorkItem(BasicBlock *BB, MemoryAccess *ClobberMA, const PHITransAddr &Addr,
+             Instruction *DomLower, Instruction *DomUpper)
+        : BB(BB), ClobberMA(ClobberMA), Addr(Addr), DomLower(DomLower),
+          DomUpper(DomUpper) {}
+    BasicBlock *BB;
+    MemoryAccess *ClobberMA;
+    PHITransAddr Addr;
+    Instruction *DomLower;
+    Instruction *DomUpper;
+  };
+  SmallVector<WorkItem, 32> Worklist;
+
+  // Keep the set of visited blocks, together with the pointer they were visited
+  // with. Due to phi-translation, it is possible that we come to a block with a
+  // different pointer in which case we set the block we're coming from (a
+  // successor of the visited block) as cloberring the memory location in an
+  // unknown way.
+  DenseMap<BasicBlock *, Value *> Visited;
+
+  // Factor out the logic deciding whether to continue traversing a predecessor.
+  // Return `Skip` for unreachable blocks and blocks, already visited
+  // with the same (maybe phi-translated) address. Return `FailPred` on
+  // phi-translation failure to an unvisited block. Return `FailBlock` if we
+  // have come to a block already visited with a different address. Otherwise,
+  // return `OK'.
+  enum { FailBlock, FailPred, Skip, OK };
+  auto shouldTraversePredecessor = [&](BasicBlock *Pred, BasicBlock *BB,
+                                       PHITransAddr &Addr) {
+    if (!DT->isReachableFromEntry(Pred))
+      return Skip;
+    if (Addr.NeedsPHITranslationFromBlock(BB))
+      Addr.PHITranslateValue(BB, Pred, DT, false);
+    auto It = Visited.find(Pred);
+    if (It != Visited.end())
+      return It->second == Addr.getAddr() ? Skip : FailBlock;
+    return Addr.getAddr() ? OK : FailPred;
+  };
+
+  const DataLayout &DL = L->getModule()->getDataLayout();
+  auto Loc = MemoryLocation::get(L);
+  bool IsInvariantLoad = L->hasMetadata(LLVMContext::MD_invariant_load);
+  bool HasSanitizer =
+      L->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeAddress) ||
+      L->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeHWAddress);
+
+  // Traverse the CFG backwards from the block containing the load instruction,
+  // looking for instructions, from which we can deduce what value the load
+  // would, well, load. Do a depth-first search with a worklist. Blocks are
+  // marked as visited at the time of adding them to the worklist. That allows
+  // as to record a block as cloberring the memory location whenever we try to
+  // continue the search into a predecessor block for which the phi-translation
+  // fails or yields a different pointer. Once exception is the initial block,
+  // which is marked visited not when we start the search (next statement
+  // below), but when we come to it for a second time via a backedge.
+  Worklist.emplace_back(
+      L->getParent(), MSSA.getMemoryAccess(L)->getDefiningAccess(),
+      PHITransAddr(L->getPointerOperand(), DL, AC), nullptr, L);
+  while (!Worklist.empty()) {
+    WorkItem Item = Worklist.back();
+    Worklist.pop_back();
+
+    assert((Item.BB == L->getParent() && Item.DomLower == nullptr) ||
+           Visited.count(Item.BB) &&
+               "All block in the worklist must be marked as visited (except "
+               "the very first block)");
+    assert(
+        Item.Addr.getAddr() != nullptr &&
+        "Blocks with failed phi-translation must not appear on the worklist");
+
+    // If we have found a definite answer (a reusable value or unknown),
+    // continue with the next block in the worklist.
+    if (Optional<ReachingMemVal> R = findReachingValueForLoadInBlock(
+            Loc.getWithNewPtr(Item.Addr.getAddr()), IsInvariantLoad, Item.BB,
+            Item.DomLower, Item.DomUpper, Item.ClobberMA, MSSA, AA)) {
+      if (R->Kind != DepKind::Def &&
+          L->hasMetadata(LLVMContext::MD_invariant_group)) {
+        if (Instruction *G = findInvariantGroupValue(L, *DT))
+          R = ReachingMemVal::getDef(getLoadStorePointerOperand(G), G);
+      }
+      Values.emplace_back(std::move(*R));
+      continue;
+    }
+
+    // Non-local speculations are not allowed under asan. Note the we can exit
+    // from here only on the first iteration of the loop.
+    assert((Item.BB == L->getParent() || !HasSanitizer) &&
+           "Should have exited on the first iteration");
+    if (HasSanitizer) {
+      Values.emplace_back(ReachingMemVal::getUnknown(Item.BB, Loc.Ptr));
+      break;
+    }
+
+    // If the clobbering access is in another block, look in the predecessors,
+    // keeping the same clobbering access. This also handles the case when the
+    // clobbering access is liveOnEntry and we aren't at the entry block.
+    if (Item.ClobberMA->getBlock() != Item.BB) {
+      SmallVector<WorkItem, 4> TmpWorklist;
+      for (BasicBlock *BB : predecessors(Item.BB)) {
+        PHITransAddr Addr = Item.Addr;
+        auto Status = shouldTraversePredecessor(BB, Item.BB, Addr);
+        if (Status == FailBlock) {
+          TmpWorklist.clear();
+          Values.push_back(ReachingMemVal::getUnknown(Item.BB));
+          break;
+        }
+        if (Status == FailPred) {
+          Visited.insert({BB, Addr.getAddr()});
+          Values.push_back(ReachingMemVal::getUnknown(BB));
+          continue;
+        }
+        if (Status == Skip)
+          continue;
+        TmpWorklist.emplace_back(BB, Item.ClobberMA, Addr,
+                                 BB == L->getParent() ? L : nullptr, nullptr);
+      }
+      llvm::for_each(TmpWorklist, [&](auto &Item) {
+        Visited.insert({Item.BB, Item.Addr.getAddr()});
+        Worklist.emplace_back(std::move(Item));
+      });
+      continue;
+    }
+
+    // If the clobbering access is a MemoryPhi, look in the predecessors,
+    // using the corresponding incoming value for this MemoryPhi as the
+    // clobbering access.
+    if (auto *MPhi = dyn_cast<MemoryPhi>(Item.ClobberMA)) {
+      SmallVector<WorkItem, 4> TmpWorklist;
+      for (unsigned I = 0, N = MPhi->getNumIncomingValues(); I < N; ++I) {
+        BasicBlock *BB = MPhi->getIncomingBlock(I);
+        PHITransAddr Addr = Item.Addr;
+        auto Status = shouldTraversePredecessor(BB, Item.BB, Addr);
+        if (Status == FailBlock) {
+          TmpWorklist.clear();
+          Values.push_back(ReachingMemVal::getUnknown(Item.BB));
+          break;
+        }
+        if (Status == FailPred) {
+          Visited.insert({BB, Addr.getAddr()});
+          Values.push_back(ReachingMemVal::getUnknown(BB));
+          continue;
+        }
+        if (Status == Skip)
+          continue;
+        TmpWorklist.emplace_back(BB, MPhi->getIncomingValue(I), Addr,
+                                 BB == L->getParent() ? L : nullptr, nullptr);
+        continue;
+      }
+      llvm::for_each(TmpWorklist, [&](auto &Item) {
+        Visited.insert({Item.BB, Item.Addr.getAddr()});
+        Worklist.emplace_back(std::move(Item));
+      });
+      continue;
+    }
+
+    if (!MSSA.isLiveOnEntryDef(Item.ClobberMA)) {
+      // The clobbering access is a normal instruction, that we can
+      // nevertheless skip over (e.g. a release fence).
+      auto *Def = cast<MemoryUseOrDef>(Item.ClobberMA);
+      Worklist.emplace_back(Item);
+      Worklist.back().ClobberMA = Def->getDefiningAccess();
+      continue;
+    }
+
+    // If we have liveOnEntry and we are at the entry block, then this block
+    // does not provide any useful value for the load.
+    Values.emplace_back(ReachingMemVal::getUnknown(Item.BB, Loc.Ptr));
+  }
+}
+
 /// Attempt to eliminate a load, first by eliminating it
 /// locally, and then attempting non-local elimination if that fails.
 bool GVNPass::processLoad(LoadInst *L) {
-  if (!MD)
+  if (!MD && !MSSAU)
     return false;
 
   // This code hasn't been audited for ordered or volatile memory access
@@ -2009,16 +2426,33 @@
     return true;
   }
 
-  // ... to a pointer that has been loaded from before...
-  MemDepResult Dep = MD->getDependency(L);
+  ReachingMemVal MemVal = ReachingMemVal::getUnknown(nullptr);
+  if (MD && !MSSAU) {
+    // ... to a pointer that has been loaded from before...
+    MemDepResult Dep = MD->getDependency(L);
 
-  // If it is defined in another block, try harder.
-  if (Dep.isNonLocal())
-    return processNonLocalLoad(L);
+    // If it is defined in another block, try harder.
+    if (Dep.isNonLocal())
+      return processNonLocalLoad(L);
 
+    // Only handle the local case below
+    if (Dep.isDef())
+      MemVal = ReachingMemVal::getDef(L->getPointerOperand(), Dep.getInst());
+    else if (Dep.isClobber())
+      MemVal =
+          ReachingMemVal::getClobber(L->getPointerOperand(), Dep.getInst());
+  } else {
+    SmallVector<ReachingMemVal, 8> MemVals;
+    findReachingValuesForLoad(L, *MSSAU->getMemorySSA(), *AA, MemVals);
+    assert(MemVals.size() && "Expected at least an unknown value");
+    if (MemVals.size() > 1 || MemVals[0].Block != L->getParent())
+      return processNonLocalLoad(L, MemVals);
+ 
   // Only handle the local case below
-  if (!Dep.isDef() && !Dep.isClobber()) {
-    // This might be a NonFuncLocal or an Unknown
+    MemVal = MemVals[0];
+  }
+
+  if (MemVal.Kind == DepKind::Other) {
     LLVM_DEBUG(
         // fast print dep, using operator<< on instruction is too slow.
         dbgs() << "GVN: load "; L->printAsOperand(dbgs());
@@ -2027,7 +2461,7 @@
   }
 
   AvailableValue AV;
-  if (AnalyzeLoadAvailability(L, Dep, L->getPointerOperand(), AV)) {
+  if (AnalyzeLoadAvailability(L, MemVal, AV)) {
     Value *AvailableValue = AV.MaterializeAdjustedValue(L, L, *this);
 
     // Replace the load!
@@ -2580,6 +3014,7 @@
   DT = &RunDT;
   VN.setDomTree(DT);
   TLI = &RunTLI;
+  AA = &RunAA;
   VN.setAliasAnalysis(&RunAA);
   MD = RunMD;
   ImplicitControlFlowTracking ImplicitCFT;
@@ -3188,8 +3623,6 @@
       return false;
 
     auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
-
-    auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
     return Impl.runImpl(
         F, getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
         getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
@@ -3200,7 +3633,9 @@
             : nullptr,
         LIWP ? &LIWP->getLoopInfo() : nullptr,
         &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(),
-        MSSAWP ? &MSSAWP->getMSSA() : nullptr);
+        Impl.isMemorySSAEnabled()
+            ? &getAnalysis<MemorySSAWrapperPass>().getMSSA()
+            : nullptr);
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
Index: llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
===================================================================
--- llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
+++ llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
@@ -1,4 +1,5 @@
-; RUN: opt -tbaa -basic-aa -gvn -S < %s | FileCheck %s
+; RUN: opt -tbaa -basic-aa -gvn -S -enable-gvn-memoryssa=false < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MEMDEP
+; RUN: opt -tbaa -basic-aa -gvn -S -enable-gvn-memoryssa=true  < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MEMSSA
 
 target datalayout = "e-p:64:64:64"
 
@@ -30,14 +31,19 @@
 ; the other type could be unified with the first type, however for now, GVN
 ; should just be conservative.
 
+; However, with the MemorySSA changes this no longer happens and GVN optimises
+; it just like in the next function.
+
 ; CHECK: @watch_out_for_type_change
 ; CHECK: if.then:
 ; CHECK:   %t = load i32, i32* %p
 ; CHECK:   store i32 %t, i32* %q
 ; CHECK:   ret void
 ; CHECK: if.else:
-; CHECK:   %u = load i32, i32* %p
-; CHECK:   store i32 %u, i32* %q
+; CHECK-MEMDEP-NEXT:   %u = load i32, i32* %p
+; CHECK-MEMDEP-NEXT:   store i32 %u, i32* %q
+; CHECK-MEMSSA-NEXT:   store i32 0, i32* %q
+; CHECK-MEMSSA-NEXT:   ret void
 
 define void @watch_out_for_type_change(i1 %c, i32* %p, i32* %p1, i32* %q) nounwind {
 entry:
Index: llvm/test/Transforms/GVN/PRE/rle.ll
===================================================================
--- llvm/test/Transforms/GVN/PRE/rle.ll
+++ llvm/test/Transforms/GVN/PRE/rle.ll
@@ -1,6 +1,7 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -data-layout="e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-n8:16:32" -basic-aa -gvn -S -dce | FileCheck %s --check-prefixes=CHECK,LE
-; RUN: opt < %s -data-layout="E-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-n32"      -basic-aa -gvn -S -dce | FileCheck %s --check-prefixes=CHECK,BE
+; RUN: opt < %s -data-layout="e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-n8:16:32" -basic-aa -gvn -enable-gvn-memoryssa=false -S -dce | FileCheck %s --check-prefixes=CHECK,LE,LE-MEMDEP
+; RUN: opt < %s -data-layout="e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-n8:16:32" -basic-aa -gvn -enable-gvn-memoryssa=true  -S -dce | FileCheck %s --check-prefixes=CHECK,LE,LE-MEMSSA
+; RUN: opt < %s -data-layout="E-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-n32"      -basic-aa -gvn -enable-gvn-memoryssa=false -S -dce | FileCheck %s --check-prefixes=CHECK,BE,BE-MEMDEP
+; RUN: opt < %s -data-layout="E-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-n32"      -basic-aa -gvn -enable-gvn-memoryssa=true  -S -dce | FileCheck %s --check-prefixes=CHECK,BE,BE-MEMSSA
 
 ;; Trivial RLE test.
 define i32 @test0(i32 %V, i32* %P) {
@@ -1014,11 +1015,15 @@
 ; CHECK-NEXT:    [[XX:%.*]] = bitcast i8* [[P:%.*]] to i32*
 ; CHECK-NEXT:    [[X1:%.*]] = load i32, i32* [[XX]], align 4
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X1]], 127
-; LE-NEXT:       [[TMP0:%.*]] = lshr i32 [[X1]], 16
-; BE-NEXT:       [[TMP0:%.*]] = lshr i32 [[X1]], 8
+; LE-MEMDEP-NEXT:       [[TMP0:%.*]] = lshr i32 [[X1]], 16
+; BE-MEMDEP-NEXT:       [[TMP0:%.*]] = lshr i32 [[X1]], 8
+; LE-MEMSSA-NEXT:       [[TMP0:%.*]] = lshr i32 [[X1]], 8
+; BE-MEMSSA-NEXT:       [[TMP0:%.*]] = lshr i32 [[X1]], 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
-; LE-NEXT:       [[TMP2:%.*]] = lshr i32 [[X1]], 8
-; BE-NEXT:       [[TMP2:%.*]] = lshr i32 [[X1]], 16
+; LE-MEMDEP-NEXT:       [[TMP2:%.*]] = lshr i32 [[X1]], 8
+; BE-MEMDEP-NEXT:       [[TMP2:%.*]] = lshr i32 [[X1]], 16
+; LE-MEMSSA-NEXT:       [[TMP2:%.*]] = lshr i32 [[X1]], 16
+; BE-MEMSSA-NEXT:       [[TMP2:%.*]] = lshr i32 [[X1]], 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
 ; CHECK-NEXT:    br i1 [[CMP]], label [[IF:%.*]], label [[ELSE:%.*]]
 ; CHECK:       if:
@@ -1026,7 +1031,10 @@
 ; CHECK:       else:
 ; CHECK-NEXT:    br label [[JOIN]]
 ; CHECK:       join:
-; CHECK-NEXT:    [[TTMP5:%.*]] = phi i8 [ [[TMP3]], [[IF]] ], [ [[TMP1]], [[ELSE]] ]
+; LE-MEMDEP-NEXT:    [[TTMP5:%.*]] = phi i8 [ [[TMP3]], [[IF]] ], [ [[TMP1]], [[ELSE]] ]
+; BE-MEMDEP-NEXT:    [[TTMP5:%.*]] = phi i8 [ [[TMP3]], [[IF]] ], [ [[TMP1]], [[ELSE]] ]
+; LE-MEMSSA-NEXT:    [[TTMP5:%.*]] = phi i8 [ [[TMP1]], [[IF]] ], [ [[TMP3]], [[ELSE]] ]
+; BE-MEMSSA-NEXT:    [[TTMP5:%.*]] = phi i8 [ [[TMP1]], [[IF]] ], [ [[TMP3]], [[ELSE]] ]
 ; CHECK-NEXT:    [[CONV6:%.*]] = zext i8 [[TTMP5]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV6]]
 ; CHECK:       if.end:
Index: llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll
+++ llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll
@@ -1,13 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mcpu=corei7 -passes='default<O1>' -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1
-; RUN: opt < %s -mcpu=corei7 -passes='default<O2>' -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O2
-; RUN: opt < %s -mcpu=corei7 -passes='default<O3>' -S -unroll-threshold=150 -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3
-; RUN: opt < %s -mcpu=corei7 -passes='default<O3>' -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DEFAULT
-; RUN: opt < %s -mcpu=corei7 -passes='default<Os>' -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Os
-; RUN: opt < %s -mcpu=corei7 -passes='default<Oz>' -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Oz
-; RUN: opt < %s -mcpu=corei7 -passes='default<O1>,loop-vectorize' -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1VEC2
-; RUN: opt < %s -mcpu=corei7 -passes='default<Oz>,loop-vectorize' -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=OzVEC2
-; RUN: opt < %s -mcpu=corei7 -passes='default<O3>' -unroll-threshold=150 -vectorize-loops=false -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DIS
+; RUN: opt < %s -mcpu=corei7 -passes='default<O1>' -S -unroll-allow-partial=0 -enable-gvn-memoryssa=false | FileCheck %s --check-prefix=O1
+; RUN: opt < %s -mcpu=corei7 -passes='default<O2>' -S -unroll-allow-partial=0 -enable-gvn-memoryssa=false | FileCheck %s --check-prefix=O2
+; RUN: opt < %s -mcpu=corei7 -passes='default<O3>' -S -unroll-threshold=150 -unroll-allow-partial=0 -enable-gvn-memoryssa=false | FileCheck %s --check-prefix=O3
+; RUN: opt < %s -mcpu=corei7 -passes='default<O3>' -S -unroll-allow-partial=0 -enable-gvn-memoryssa=false| FileCheck %s --check-prefix=O3DEFAULT
+; RUN: opt < %s -mcpu=corei7 -passes='default<Os>' -S -unroll-allow-partial=0 -enable-gvn-memoryssa=false| FileCheck %s --check-prefix=Os
+; RUN: opt < %s -mcpu=corei7 -passes='default<Oz>' -S -unroll-allow-partial=0 -enable-gvn-memoryssa=false| FileCheck %s --check-prefix=Oz
+; RUN: opt < %s -mcpu=corei7 -passes='default<O1>,loop-vectorize' -S -unroll-allow-partial=0 -enable-gvn-memoryssa=false | FileCheck %s --check-prefix=O1VEC2
+; RUN: opt < %s -mcpu=corei7 -passes='default<Oz>,loop-vectorize' -S -unroll-allow-partial=0 -enable-gvn-memoryssa=false | FileCheck %s --check-prefix=OzVEC2
+; RUN: opt < %s -mcpu=corei7 -passes='default<O3>' -unroll-threshold=150 -vectorize-loops=false -S -unroll-allow-partial=0 -enable-gvn-memoryssa=false | FileCheck %s --check-prefix=O3DIS
 
 ; This file tests the llvm.loop.vectorize.enable metadata forcing
 ; vectorization even when optimization levels are too low, or when