Index: llvm/trunk/include/llvm/Analysis/MemoryDependenceAnalysis.h
===================================================================
--- llvm/trunk/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ llvm/trunk/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -26,6 +26,7 @@
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/PredIteratorCache.h"
+#include "llvm/IR/ValueHandle.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cassert>
@@ -314,7 +315,10 @@
   /// Cache storing single nonlocal def for the instruction.
   /// It is set when nonlocal def would be found in function returning only
   /// local dependencies.
-  DenseMap<Instruction *, NonLocalDepResult> NonLocalDefsCache;
+  DenseMap<AssertingVH<const Value>, NonLocalDepResult> NonLocalDefsCache;
+  using ReverseNonLocalDefsCacheTy =
+    DenseMap<Instruction *, SmallPtrSet<const Value*, 4>>;
+  ReverseNonLocalDefsCacheTy ReverseNonLocalDefsCache;
 
   /// This map stores the cached results of doing a pointer lookup at the
   /// bottom of a block.
Index: llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp
===================================================================
--- llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -433,6 +433,7 @@
   NonLocalDefsCache.try_emplace(
       LI, NonLocalDepResult(ClosestDependency->getParent(),
                             MemDepResult::getDef(ClosestDependency), nullptr));
+  ReverseNonLocalDefsCache[ClosestDependency].insert(LI);
   return MemDepResult::getNonLocal();
 }
 
@@ -919,12 +920,12 @@
          "Can't get pointer deps of a non-pointer!");
   Result.clear();
   {
-    // Check if there is cached Def with invariant.group. FIXME: cache might be
-    // invalid if cached instruction would be removed between call to
-    // getPointerDependencyFrom and this function.
+    // Check if there is cached Def with invariant.group.
     auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
     if (NonLocalDefIt != NonLocalDefsCache.end()) {
-      Result.push_back(std::move(NonLocalDefIt->second));
+      Result.push_back(NonLocalDefIt->second);
+      ReverseNonLocalDefsCache[NonLocalDefIt->second.getResult().getInst()]
+          .erase(QueryInst);
       NonLocalDefsCache.erase(NonLocalDefIt);
       return;
     }
@@ -1459,9 +1460,29 @@
   return true;
 }
 
-/// If P exists in CachedNonLocalPointerInfo, remove it.
+/// If P exists in CachedNonLocalPointerInfo or NonLocalDefsCache, remove it.
 void MemoryDependenceResults::RemoveCachedNonLocalPointerDependencies(
     ValueIsLoadPair P) {
+
+  // Most of the time this cache is empty.
+  if (!NonLocalDefsCache.empty()) {
+    auto it = NonLocalDefsCache.find(P.getPointer());
+    if (it != NonLocalDefsCache.end()) {
+      RemoveFromReverseMap(ReverseNonLocalDefsCache,
+                           it->second.getResult().getInst(), P.getPointer());
+      NonLocalDefsCache.erase(it);
+    }
+
+    if (auto *I = dyn_cast<Instruction>(P.getPointer())) {
+      auto toRemoveIt = ReverseNonLocalDefsCache.find(I);
+      if (toRemoveIt != ReverseNonLocalDefsCache.end()) {
+        for (const auto &entry : toRemoveIt->second)
+          NonLocalDefsCache.erase(entry);
+        ReverseNonLocalDefsCache.erase(toRemoveIt);
+      }
+    }
+  }
+
   CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P);
   if (It == NonLocalPointerDeps.end())
     return;
Index: llvm/trunk/test/Analysis/MemoryDependenceAnalysis/invariant.group-bug.ll
===================================================================
--- llvm/trunk/test/Analysis/MemoryDependenceAnalysis/invariant.group-bug.ll
+++ llvm/trunk/test/Analysis/MemoryDependenceAnalysis/invariant.group-bug.ll
@@ -0,0 +1,111 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -gvn -S < %s | FileCheck %s
+
+; Memdep had funny bug related to invariant.groups - because it did not
+; invalidated cache, in some very rare cases it was possible to show memory
+; dependence of the instruction that was deleted, but because other instruction
+; took it's place it resulted in call to vtable! Removing any of the branch
+; hides the bug.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+%0 = type { i32 (...)**, %1 }
+%1 = type { %2 }
+%2 = type { %3 }
+%3 = type { %4, i64, %5 }
+%4 = type { i8* }
+%5 = type { i64, [8 x i8] }
+
+define void @fail(i1* noalias sret, %0*, %1*, i8*) local_unnamed_addr #0 {
+; CHECK-LABEL: @fail(
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast %0* [[TMP1:%.*]] to i64 (%0*)***
+; CHECK-NEXT:    [[TMP6:%.*]] = load i64 (%0*)**, i64 (%0*)*** [[TMP5]], align 8, !invariant.group !6
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64 (%0*)*, i64 (%0*)** [[TMP6]], i64 6
+; CHECK-NEXT:    [[TMP8:%.*]] = load i64 (%0*)*, i64 (%0*)** [[TMP7]], align 8, !invariant.load !6
+; CHECK-NEXT:    [[TMP9:%.*]] = tail call i64 [[TMP8]](%0* [[TMP1]]) #1
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[TMP1]], %1* [[TMP2:%.*]], i64 0, i32 0, i32 0, i32 0, i32 0
+; CHECK-NEXT:    [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 8
+; CHECK-NEXT:    store i8 0, i8* [[TMP11]], align 1
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i64 (%0*)** [[TMP6]] to i64 (%0*, i8*, i64)**
+; CHECK-NEXT:    br i1 undef
+; CHECK:         [[TMP14:%.*]] = bitcast %0* [[TMP1]] to i64 (%0*, i8*, i64)***
+; CHECK-NEXT:    [[DOTPHI_TRANS_INSERT:%.*]] = getelementptr inbounds i64 (%0*, i8*, i64)*, i64 (%0*, i8*, i64)** [[TMP12]], i64 22
+; CHECK-NEXT:    [[DOTPRE:%.*]] = load i64 (%0*, i8*, i64)*, i64 (%0*, i8*, i64)** [[DOTPHI_TRANS_INSERT]], align 8, !invariant.load !6
+; CHECK-NEXT:    br label [[TMP15:%.*]]
+; CHECK:         [[TMP16:%.*]] = call i64 [[DOTPRE]](%0* nonnull [[TMP1]], i8* null, i64 0) #1
+
+  %5 = bitcast %0* %1 to i64 (%0*)***
+  %6 = load i64 (%0*)**, i64 (%0*)*** %5, align 8, !invariant.group !6
+  %7 = getelementptr inbounds i64 (%0*)*, i64 (%0*)** %6, i64 6
+  %8 = load i64 (%0*)*, i64 (%0*)** %7, align 8, !invariant.load !6
+  %9 = tail call i64 %8(%0* %1) #1
+  %10 = getelementptr inbounds %1, %1* %2, i64 0, i32 0, i32 0, i32 0, i32 0
+  %11 = load i8*, i8** %10, align 8
+  store i8 0, i8* %11, align 1
+  br i1 undef, label %12, label %31
+
+; <label>:12:                                     ; preds = %4
+  %13 = bitcast %0* %1 to i64 (%0*, i8*, i64)***
+  br label %14
+
+; <label>:14:                                     ; preds = %30, %12
+  %15 = load i64 (%0*, i8*, i64)**, i64 (%0*, i8*, i64)*** %13, align 8, !invariant.group !6
+  %16 = getelementptr inbounds i64 (%0*, i8*, i64)*, i64 (%0*, i8*, i64)** %15, i64 22
+  %17 = load i64 (%0*, i8*, i64)*, i64 (%0*, i8*, i64)** %16, align 8, !invariant.load !6
+  %18 = call i64 %17(%0* nonnull %1, i8* null, i64 0) #1
+  br i1 undef, label %30, label %19
+
+; <label>:19:                                     ; preds = %14
+  br i1 undef, label %20, label %23
+
+; <label>:20:                                     ; preds = %19
+  br label %21
+
+; <label>:21:                                     ; preds = %20
+  br label %22
+
+; <label>:22:                                     ; preds = %21
+  br label %30
+
+; <label>:23:                                     ; preds = %19
+  br label %24
+
+; <label>:24:                                     ; preds = %23
+  br label %25
+
+; <label>:25:                                     ; preds = %24
+  br label %26
+
+; <label>:26:                                     ; preds = %25
+  br i1 undef, label %27, label %28
+
+; <label>:27:                                     ; preds = %26
+  br label %30
+
+; <label>:28:                                     ; preds = %26
+  br label %29
+
+; <label>:29:                                     ; preds = %28
+  br label %30
+
+; <label>:30:                                     ; preds = %29, %27, %22, %14
+  br i1 undef, label %14, label %31
+
+; <label>:31:                                     ; preds = %30, %4
+  ret void
+}
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.linker.options = !{}
+!llvm.module.flags = !{!0, !1, !3, !4, !5}
+
+!0 = !{i32 1, !"StrictVTablePointers", i32 1}
+!1 = !{i32 3, !"StrictVTablePointersRequirement", !2}
+!2 = !{!"StrictVTablePointers", i32 1}
+!3 = !{i32 1, !"wchar_size", i32 4}
+!4 = !{i32 7, !"PIC Level", i32 2}
+!5 = !{i32 7, !"PIE Level", i32 2}
+!6 = !{}