Index: llvm/trunk/include/llvm/Analysis/MemoryDependenceAnalysis.h
===================================================================
--- llvm/trunk/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ llvm/trunk/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -428,13 +428,15 @@
     MemDepResult getCallSiteDependencyFrom(CallSite C, bool isReadOnlyCall,
                                            BasicBlock::iterator ScanIt,
                                            BasicBlock *BB);
-    bool getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
+    bool getNonLocalPointerDepFromBB(Instruction *QueryInst,
+                                     const PHITransAddr &Pointer,
                                      const AliasAnalysis::Location &Loc,
                                      bool isLoad, BasicBlock *BB,
                                      SmallVectorImpl<NonLocalDepResult> &Result,
                                      DenseMap<BasicBlock*, Value*> &Visited,
                                      bool SkipFirstBlock = false);
-    MemDepResult GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
+    MemDepResult GetNonLocalInfoForBlock(Instruction *QueryInst,
+                                         const AliasAnalysis::Location &Loc,
                                          bool isLoad, BasicBlock *BB,
                                          NonLocalDepInfo *Cache,
                                          unsigned NumSortedEntries);
Index: llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp
===================================================================
--- llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -924,7 +924,7 @@
   // a block with multiple different pointers.  This can happen during PHI
   // translation.
   DenseMap<BasicBlock*, Value*> Visited;
-  if (!getNonLocalPointerDepFromBB(Address, Loc, isLoad, FromBB,
+  if (!getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
                                    Result, Visited, true))
     return;
   Result.clear();
@@ -938,7 +938,8 @@
 /// lookup (which may use dirty cache info if available).  If we do a lookup,
 /// add the result to the cache.
 MemDepResult MemoryDependenceAnalysis::
-GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
+GetNonLocalInfoForBlock(Instruction *QueryInst,
+                        const AliasAnalysis::Location &Loc,
                         bool isLoad, BasicBlock *BB,
                         NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
 
@@ -979,7 +980,8 @@
   }
 
   // Scan the block for the dependency.
-  MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB);
+  MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB,
+                                              QueryInst);
 
   // If we had a dirty entry for the block, update it.  Otherwise, just add
   // a new entry.
@@ -1052,7 +1054,8 @@
 /// not compute dependence information for some reason.  This should be treated
 /// as a clobber dependence on the first instruction in the predecessor block.
 bool MemoryDependenceAnalysis::
-getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
+getNonLocalPointerDepFromBB(Instruction *QueryInst,
+                            const PHITransAddr &Pointer,
                             const AliasAnalysis::Location &Loc,
                             bool isLoad, BasicBlock *StartBB,
                             SmallVectorImpl<NonLocalDepResult> &Result,
@@ -1091,7 +1094,7 @@
     } else if (CacheInfo->Size > Loc.Size) {
       // This query's Size is less than the cached one. Conservatively restart
       // the query using the greater size.
-      return getNonLocalPointerDepFromBB(Pointer,
+      return getNonLocalPointerDepFromBB(QueryInst, Pointer,
                                          Loc.getWithNewSize(CacheInfo->Size),
                                          isLoad, StartBB, Result, Visited,
                                          SkipFirstBlock);
@@ -1111,7 +1114,8 @@
         CacheInfo->NonLocalDeps.clear();
       }
       if (Loc.AATags)
-        return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutAATags(),
+        return getNonLocalPointerDepFromBB(QueryInst,
+                                           Pointer, Loc.getWithoutAATags(),
                                            isLoad, StartBB, Result, Visited,
                                            SkipFirstBlock);
     }
@@ -1214,7 +1218,8 @@
       // Get the dependency info for Pointer in BB.  If we have cached
       // information, we will use it, otherwise we compute it.
       DEBUG(AssertSorted(*Cache, NumSortedEntries));
-      MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache,
+      MemDepResult Dep = GetNonLocalInfoForBlock(QueryInst,
+                                                 Loc, isLoad, BB, Cache,
                                                  NumSortedEntries);
 
       // If we got a Def or Clobber, add this to the list of results.
@@ -1348,7 +1353,7 @@
       // result conflicted with the Visited list; we have to conservatively
       // assume it is unknown, but this also does not block PRE of the load.
       if (!CanTranslate ||
-          getNonLocalPointerDepFromBB(PredPointer,
+          getNonLocalPointerDepFromBB(QueryInst, PredPointer,
                                       Loc.getWithNewPtr(PredPtrVal),
                                       isLoad, Pred,
                                       Result, Visited)) {
Index: llvm/trunk/test/Transforms/GVN/invariant-load.ll
===================================================================
--- llvm/trunk/test/Transforms/GVN/invariant-load.ll
+++ llvm/trunk/test/Transforms/GVN/invariant-load.ll
@@ -27,5 +27,43 @@
   ret i32 %add
 }
 
+; With the invariant.load metadata, what would otherwise
+; be a case for PRE becomes a full redundancy.
+define i32 @test3(i1 %cnd, i32* %p, i32* %q) {
+; CHECK-LABEL: test3
+; CHECK-NOT: load
+entry:
+  %v1 = load i32* %p
+  br i1 %cnd, label %bb1, label %bb2
+
+bb1:
+  store i32 5, i32* %q
+  br label %bb2
+
+bb2:
+  %v2 = load i32* %p, !invariant.load !0
+  %res = sub i32 %v1, %v2
+  ret i32 %res
+}
+
+; This test is here to document a case which doesn't optimize
+; as well as it could.  
+define i32 @test4(i1 %cnd, i32* %p, i32* %q) {
+; CHECK-LABEL: test4
+; %v2 is redundant, but GVN currently doesn't catch that
+entry:
+  %v1 = load i32* %p, !invariant.load !0
+  br i1 %cnd, label %bb1, label %bb2
+
+bb1:
+  store i32 5, i32* %q
+  br label %bb2
+
+bb2:
+  %v2 = load i32* %p
+  %res = sub i32 %v1, %v2
+  ret i32 %res
+}
+
 !0 = !{ }
 
Index: llvm/trunk/test/Transforms/GVN/tbaa.ll
===================================================================
--- llvm/trunk/test/Transforms/GVN/tbaa.ll
+++ llvm/trunk/test/Transforms/GVN/tbaa.ll
@@ -1,4 +1,4 @@
-; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
+; RUN: opt -tbaa -basicaa -gvn -S < %s | FileCheck %s
 
 define i32 @test1(i8* %p, i8* %q) {
 ; CHECK: @test1(i8* %p, i8* %q)
@@ -72,6 +72,37 @@
   ret i32 %c
 }
 
+
+
+define i32 @test8(i32* %p, i32* %q) {
+; CHECK-LABEL: test8
+; CHECK-NEXT: store i32 15, i32* %p
+; CHECK-NEXT: ret i32 0
+; Since we know the location is invariant, we can forward the
+; load across the potentially aliasing store.
+
+  %a = load i32* %q, !tbaa !10
+  store i32 15, i32* %p
+  %b = load i32* %q, !tbaa !10
+  %c = sub i32 %a, %b
+  ret i32 %c
+}
+define i32 @test9(i32* %p, i32* %q) {
+; CHECK-LABEL: test9
+; CHECK-NEXT: call void @clobber()
+; CHECK-NEXT: ret i32 0
+; Since we know the location is invariant, we can forward the
+; load across the potentially aliasing store (within the call).
+
+  %a = load i32* %q, !tbaa !10
+  call void @clobber()
+  %b = load i32* %q, !tbaa !10
+  %c = sub i32 %a, %b
+  ret i32 %c
+}
+
+
+declare void @clobber()
 declare i32 @foo(i8*) readonly
 
 ; CHECK: [[TAGC]] = !{[[TYPEC:!.*]], [[TYPEC]], i64 0}
@@ -89,3 +120,9 @@
 !6 = !{!"A", !2}
 !7 = !{!"B", !6}
 !8 = !{!"another root", null}
+
+
+;; A TBAA structure who's only point is to have a constant location
+!9 = !{!"yet another root"}
+!10 = !{!"node", !9, i64 1}
+