Index: include/llvm/Analysis/Utils/Local.h
===================================================================
--- include/llvm/Analysis/Utils/Local.h
+++ include/llvm/Analysis/Utils/Local.h
@@ -417,10 +417,12 @@
 bool removeUnreachableBlocks(Function &F, LazyValueInfo *LVI = nullptr,
                              DeferredDominance *DDT = nullptr);
 
-/// Combine the metadata of two instructions so that K can replace J
+/// Combine the metadata of two instructions so that K can replace J. Some
+/// metadata kinds can only be kept if K dominates J.
 ///
 /// Metadata not listed as known via KnownIDs is removed
-void combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsigned> KnownIDs);
+void combineMetadata(Instruction *K, const Instruction *J,
+                     ArrayRef<unsigned> KnownIDs, bool KDominatesJ = false);
 
 /// Combine the metadata of two instructions so that K can replace J. This
 /// specifically handles the case of CSE-like transformations.
@@ -430,7 +432,7 @@
 
 /// Patch the replacement so that it is not more restrictive than the value
 /// being replaced.
-void patchReplacementInstruction(Instruction *I, Value *Repl);
+void patchReplacementInstruction(Instruction *I, Value *Repl, bool KDominatesJ);
 
 // Replace each use of 'From' with 'To', if that use does not belong to basic
 // block where 'From' is defined. Returns the number of replacements made.
Index: lib/Transforms/Scalar/GVN.cpp
===================================================================
--- lib/Transforms/Scalar/GVN.cpp
+++ lib/Transforms/Scalar/GVN.cpp
@@ -1442,8 +1442,9 @@
   return Changed;
 }
 
-static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
-  patchReplacementInstruction(I, Repl);
+static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl,
+                                       bool ReplDominates) {
+  patchReplacementInstruction(I, Repl, ReplDominates);
   I->replaceAllUsesWith(Repl);
 }
 
@@ -1483,8 +1484,14 @@
   if (AnalyzeLoadAvailability(L, Dep, L->getPointerOperand(), AV)) {
     Value *AvailableValue = AV.MaterializeAdjustedValue(L, L, *this);
 
+    Instruction *ReplInst = dyn_cast<Instruction>(AvailableValue);
+    (void)ReplInst;
+    assert(
+        (!ReplInst || DT->dominates(ReplInst->getParent(), L->getParent())) &&
+        "patchAndReplaceAllUses requires ReplInst to dominate I");
+
     // Replace the load!
-    patchAndReplaceAllUsesWith(L, AvailableValue);
+    patchAndReplaceAllUsesWith(L, AvailableValue, true);
     markInstructionForDeletion(L);
     ++NumGVNLoad;
     reportLoadElim(L, AvailableValue, ORE);
@@ -1961,8 +1968,13 @@
     return false;
   }
 
+  Instruction *ReplInst = dyn_cast<Instruction>(Repl);
+  (void)ReplInst;
+  assert((!ReplInst || DT->dominates(ReplInst->getParent(), I->getParent())) &&
+         "patchAndReplaceAllUses requires ReplInst to dominate I");
+
   // Remove it!
-  patchAndReplaceAllUsesWith(I, Repl);
+  patchAndReplaceAllUsesWith(I, Repl, true);
   if (MD && Repl->getType()->isPtrOrPtrVectorTy())
     MD->invalidateCachedPointerInfo(Repl);
   markInstructionForDeletion(I);
@@ -2273,7 +2285,7 @@
     if (Value *V = predMap[i].first) {
       // If we use an existing value in this phi, we have to patch the original
       // value because the phi will be used to replace a later value.
-      patchReplacementInstruction(CurInst, V);
+      patchReplacementInstruction(CurInst, V, false);
       Phi->addIncoming(V, predMap[i].second);
     } else
       Phi->addIncoming(PREInstr, PREPred);
Index: lib/Transforms/Scalar/NewGVN.cpp
===================================================================
--- lib/Transforms/Scalar/NewGVN.cpp
+++ lib/Transforms/Scalar/NewGVN.cpp
@@ -3697,7 +3697,7 @@
 }
 
 static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
-  patchReplacementInstruction(I, Repl);
+  patchReplacementInstruction(I, Repl, true);
   I->replaceAllUsesWith(Repl);
 }
 
@@ -3735,6 +3735,10 @@
 
 void NewGVN::replaceInstruction(Instruction *I, Value *V) {
   LLVM_DEBUG(dbgs() << "Replacing " << *I << " with " << *V << "\n");
+  Instruction *ReplInst = dyn_cast<Instruction>(V);
+  (void)ReplInst;
+  assert((!ReplInst || DT->dominates(ReplInst->getParent(), I->getParent())) &&
+         "patchAndReplaceAllUses requires ReplInst to dominate I");
   patchAndReplaceAllUsesWith(I, V);
   // We save the actual erasing to avoid invalidating memory
   // dependencies until we are done with everything.
@@ -4083,8 +4087,15 @@
           // original operand, as we already know we can just drop it.
           auto *ReplacedInst = cast<Instruction>(U->get());
           auto *PI = PredInfo->getPredicateInfoFor(ReplacedInst);
-          if (!PI || DominatingLeader != PI->OriginalOp)
-            patchReplacementInstruction(ReplacedInst, DominatingLeader);
+          if (!PI || DominatingLeader != PI->OriginalOp) {
+            Instruction *ReplInst = dyn_cast<Instruction>(DominatingLeader);
+            (void)ReplInst;
+            assert((!ReplInst || DT->dominates(ReplInst->getParent(),
+                                               ReplacedInst->getParent())) &&
+                   "patchReplacementInstruction requires ReplInst to dominate "
+                   "ReplacedInst");
+            patchReplacementInstruction(ReplacedInst, DominatingLeader, true);
+          }
           U->set(DominatingLeader);
           // This is now a use of the dominating leader, which means if the
           // dominating leader was dead, it's now live!
Index: lib/Transforms/Utils/Local.cpp
===================================================================
--- lib/Transforms/Utils/Local.cpp
+++ lib/Transforms/Utils/Local.cpp
@@ -1998,7 +1998,7 @@
 }
 
 void llvm::combineMetadata(Instruction *K, const Instruction *J,
-                           ArrayRef<unsigned> KnownIDs) {
+                           ArrayRef<unsigned> KnownIDs, bool KDominatesJ) {
   SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
   K->dropUnknownNonDebugMetadata(KnownIDs);
   K->getAllMetadataOtherThanDebugLoc(Metadata);
@@ -2034,8 +2034,9 @@
         K->setMetadata(Kind, JMD);
         break;
       case LLVMContext::MD_nonnull:
-        // Only set the !nonnull if it is present in both instructions.
-        K->setMetadata(Kind, JMD);
+        // If K does not dominate J, both must have !nonnull.
+        if (!KDominatesJ)
+          K->setMetadata(Kind, JMD);
         break;
       case LLVMContext::MD_invariant_group:
         // Preserve !invariant.group in K.
@@ -2073,7 +2074,8 @@
   combineMetadata(K, J, KnownIDs);
 }
 
-void llvm::patchReplacementInstruction(Instruction *I, Value *Repl) {
+void llvm::patchReplacementInstruction(Instruction *I, Value *Repl,
+                                       bool ReplDominatesI) {
   auto *ReplInst = dyn_cast<Instruction>(Repl);
   if (!ReplInst)
     return;
@@ -2093,15 +2095,13 @@
   // noalias scopes here and do better than the general conservative
   // answer used in combineMetadata().
 
-  // In general, GVN unifies expressions over different control-flow
-  // regions, and so we need a conservative combination of the noalias
-  // scopes.
-  static const unsigned KnownIDs[] = {
-      LLVMContext::MD_tbaa,            LLVMContext::MD_alias_scope,
-      LLVMContext::MD_noalias,         LLVMContext::MD_range,
-      LLVMContext::MD_fpmath,          LLVMContext::MD_invariant_load,
-      LLVMContext::MD_invariant_group};
-  combineMetadata(ReplInst, I, KnownIDs);
+
+	static const unsigned KnownIDs[] = {
+		LLVMContext::MD_tbaa,     			 LLVMContext::MD_alias_scope,
+		LLVMContext::MD_noalias,    		 LLVMContext::MD_range,
+	  LLVMContext::MD_fpmath,          LLVMContext::MD_invariant_load,
+		LLVMContext::MD_invariant_group, LLVMContext::MD_nonnull};
+  combineMetadata(ReplInst, I, KnownIDs, ReplDominatesI);
 }
 
 template <typename RootType, typename DominatesFn>
Index: test/Transforms/NewGVN/metadata-nonnull.ll
===================================================================
--- /dev/null
+++ test/Transforms/NewGVN/metadata-nonnull.ll
@@ -0,0 +1,171 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+
+; RUN: opt %s -newgvn -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i8* @test1(i8** %v0, i8** %v1) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    [[V2:%.*]] = load i8*, i8** [[V0:%.*]], !nonnull !0
+; CHECK-NEXT:    store i8* [[V2]], i8** [[V1:%.*]]
+; CHECK-NEXT:    ret i8* [[V2]]
+;
+top:
+  %v2 = load i8*, i8** %v0, !nonnull !0
+  store i8* %v2, i8** %v1
+  %v3 = load i8*, i8** %v1
+  ret i8* %v3
+}
+
+; FIXME: could propagate nonnull to first load?
+define i8* @test2(i8** %v0, i8** %v1) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    [[V2:%.*]] = load i8*, i8** [[V0:%.*]]
+; CHECK-NEXT:    store i8* [[V2]], i8** [[V1:%.*]]
+; CHECK-NEXT:    ret i8* [[V2]]
+;
+top:
+  %v2 = load i8*, i8** %v0
+  store i8* %v2, i8** %v1
+  %v3 = load i8*, i8** %v1, !nonnull !0
+  ret i8* %v3
+}
+
+declare void @use1(i8* %a) readonly
+
+define i8* @test3(i8** %v0) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    [[V1:%.*]] = load i8*, i8** [[V0:%.*]]
+; CHECK-NEXT:    call void @use1(i8* [[V1]])
+; CHECK-NEXT:    br i1 undef, label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    ret i8* [[V1]]
+; CHECK:       bb2:
+; CHECK-NEXT:    ret i8* [[V1]]
+;
+top:
+  %v1 = load i8*, i8** %v0
+  call void @use1(i8* %v1)
+  br i1 undef, label %bb1, label %bb2
+
+bb1:
+  %v2 = load i8*, i8** %v0, !nonnull !0
+  ret i8* %v2
+
+bb2:
+  %v3 = load i8*, i8** %v0
+  ret i8* %v3
+}
+
+define i8* @test4(i8** %v0) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    [[V1:%.*]] = load i8*, i8** [[V0:%.*]]
+; CHECK-NEXT:    call void @use1(i8* [[V1]])
+; CHECK-NEXT:    br i1 undef, label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    ret i8* [[V1]]
+; CHECK:       bb2:
+; CHECK-NEXT:    ret i8* [[V1]]
+;
+top:
+  %v1 = load i8*, i8** %v0
+  call void @use1(i8* %v1)
+  br i1 undef, label %bb1, label %bb2
+
+bb1:
+  %v2 = load i8*, i8** %v0
+  ret i8* %v2
+
+bb2:
+  %v3 = load i8*, i8** %v0, !nonnull !0
+  ret i8* %v3
+}
+
+define i8* @test5(i8** %v0) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    [[V1:%.*]] = load i8*, i8** [[V0:%.*]], !nonnull !0
+; CHECK-NEXT:    call void @use1(i8* [[V1]])
+; CHECK-NEXT:    br i1 undef, label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    ret i8* [[V1]]
+; CHECK:       bb2:
+; CHECK-NEXT:    ret i8* [[V1]]
+;
+top:
+  %v1 = load i8*, i8** %v0, !nonnull !0
+  call void @use1(i8* %v1)
+  br i1 undef, label %bb1, label %bb2
+
+bb1:
+  %v2 = load i8*, i8** %v0
+  ret i8* %v2
+
+bb2:
+  %v3 = load i8*, i8** %v0
+  ret i8* %v3
+}
+
+define i8* @test6(i8** %v0, i8** %v1) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    br i1 undef, label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[V2:%.*]] = load i8*, i8** [[V0:%.*]], !nonnull !0
+; CHECK-NEXT:    store i8* [[V2]], i8** [[V1:%.*]]
+; CHECK-NEXT:    ret i8* [[V2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[V4:%.*]] = load i8*, i8** [[V0]]
+; CHECK-NEXT:    store i8* [[V4]], i8** [[V1]]
+; CHECK-NEXT:    ret i8* [[V4]]
+;
+top:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:
+  %v2 = load i8*, i8** %v0, !nonnull !0
+  store i8* %v2, i8** %v1
+  %v3 = load i8*, i8** %v1
+  ret i8* %v3
+
+bb2:
+  %v4 = load i8*, i8** %v0
+  store i8* %v4, i8** %v1
+  %v5 = load i8*, i8** %v1, !nonnull !0
+  ret i8* %v5
+}
+
+declare void @use2(i8* %a)
+
+define i8* @test7(i8** %v0) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    [[V1:%.*]] = load i8*, i8** [[V0:%.*]], !nonnull !0
+; CHECK-NEXT:    call void @use2(i8* [[V1]])
+; CHECK-NEXT:    br i1 undef, label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[V2:%.*]] = load i8*, i8** [[V0]]
+; CHECK-NEXT:    ret i8* [[V2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[V3:%.*]] = load i8*, i8** [[V0]]
+; CHECK-NEXT:    ret i8* [[V3]]
+;
+top:
+  %v1 = load i8*, i8** %v0, !nonnull !0
+  call void @use2(i8* %v1)
+  br i1 undef, label %bb1, label %bb2
+
+bb1:
+  %v2 = load i8*, i8** %v0
+  ret i8* %v2
+
+bb2:
+  %v3 = load i8*, i8** %v0
+  ret i8* %v3
+}
+
+!0 = !{}