Index: llvm/trunk/include/llvm/Analysis/MemoryDependenceAnalysis.h =================================================================== --- llvm/trunk/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ llvm/trunk/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -428,13 +428,15 @@ MemDepResult getCallSiteDependencyFrom(CallSite C, bool isReadOnlyCall, BasicBlock::iterator ScanIt, BasicBlock *BB); - bool getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, + bool getNonLocalPointerDepFromBB(Instruction *QueryInst, + const PHITransAddr &Pointer, const AliasAnalysis::Location &Loc, bool isLoad, BasicBlock *BB, SmallVectorImpl<NonLocalDepResult> &Result, DenseMap<BasicBlock*, Value*> &Visited, bool SkipFirstBlock = false); - MemDepResult GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, + MemDepResult GetNonLocalInfoForBlock(Instruction *QueryInst, + const AliasAnalysis::Location &Loc, bool isLoad, BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries); Index: llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp =================================================================== --- llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp +++ llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -924,7 +924,7 @@ // a block with multiple different pointers. This can happen during PHI // translation. DenseMap<BasicBlock*, Value*> Visited; - if (!getNonLocalPointerDepFromBB(Address, Loc, isLoad, FromBB, + if (!getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB, Result, Visited, true)) return; Result.clear(); @@ -938,7 +938,8 @@ /// lookup (which may use dirty cache info if available). If we do a lookup, /// add the result to the cache. MemDepResult MemoryDependenceAnalysis:: -GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, +GetNonLocalInfoForBlock(Instruction *QueryInst, + const AliasAnalysis::Location &Loc, bool isLoad, BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) { @@ -979,7 +980,8 @@ } // Scan the block for the dependency. - MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB); + MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB, + QueryInst); // If we had a dirty entry for the block, update it. Otherwise, just add // a new entry. @@ -1052,7 +1054,8 @@ /// not compute dependence information for some reason. This should be treated /// as a clobber dependence on the first instruction in the predecessor block. bool MemoryDependenceAnalysis:: -getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, +getNonLocalPointerDepFromBB(Instruction *QueryInst, + const PHITransAddr &Pointer, const AliasAnalysis::Location &Loc, bool isLoad, BasicBlock *StartBB, SmallVectorImpl<NonLocalDepResult> &Result, @@ -1091,7 +1094,7 @@ } else if (CacheInfo->Size > Loc.Size) { // This query's Size is less than the cached one. Conservatively restart // the query using the greater size. - return getNonLocalPointerDepFromBB(Pointer, + return getNonLocalPointerDepFromBB(QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad, StartBB, Result, Visited, SkipFirstBlock); @@ -1111,7 +1114,8 @@ CacheInfo->NonLocalDeps.clear(); } if (Loc.AATags) - return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutAATags(), + return getNonLocalPointerDepFromBB(QueryInst, + Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result, Visited, SkipFirstBlock); } @@ -1214,7 +1218,8 @@ // Get the dependency info for Pointer in BB. If we have cached // information, we will use it, otherwise we compute it. DEBUG(AssertSorted(*Cache, NumSortedEntries)); - MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache, + MemDepResult Dep = GetNonLocalInfoForBlock(QueryInst, + Loc, isLoad, BB, Cache, NumSortedEntries); // If we got a Def or Clobber, add this to the list of results. @@ -1348,7 +1353,7 @@ // result conflicted with the Visited list; we have to conservatively // assume it is unknown, but this also does not block PRE of the load. if (!CanTranslate || - getNonLocalPointerDepFromBB(PredPointer, + getNonLocalPointerDepFromBB(QueryInst, PredPointer, Loc.getWithNewPtr(PredPtrVal), isLoad, Pred, Result, Visited)) { Index: llvm/trunk/test/Transforms/GVN/invariant-load.ll =================================================================== --- llvm/trunk/test/Transforms/GVN/invariant-load.ll +++ llvm/trunk/test/Transforms/GVN/invariant-load.ll @@ -27,5 +27,43 @@ ret i32 %add } +; With the invariant.load metadata, what would otherwise +; be a case for PRE becomes a full redundancy. +define i32 @test3(i1 %cnd, i32* %p, i32* %q) { +; CHECK-LABEL: test3 +; CHECK-NOT: load +entry: + %v1 = load i32* %p + br i1 %cnd, label %bb1, label %bb2 + +bb1: + store i32 5, i32* %q + br label %bb2 + +bb2: + %v2 = load i32* %p, !invariant.load !0 + %res = sub i32 %v1, %v2 + ret i32 %res +} + +; This test is here to document a case which doesn't optimize +; as well as it could. +define i32 @test4(i1 %cnd, i32* %p, i32* %q) { +; CHECK-LABEL: test4 +; %v2 is redundant, but GVN currently doesn't catch that +entry: + %v1 = load i32* %p, !invariant.load !0 + br i1 %cnd, label %bb1, label %bb2 + +bb1: + store i32 5, i32* %q + br label %bb2 + +bb2: + %v2 = load i32* %p + %res = sub i32 %v1, %v2 + ret i32 %res +} + !0 = !{ } Index: llvm/trunk/test/Transforms/GVN/tbaa.ll =================================================================== --- llvm/trunk/test/Transforms/GVN/tbaa.ll +++ llvm/trunk/test/Transforms/GVN/tbaa.ll @@ -1,4 +1,4 @@ -; RUN: opt -basicaa -gvn -S < %s | FileCheck %s +; RUN: opt -tbaa -basicaa -gvn -S < %s | FileCheck %s define i32 @test1(i8* %p, i8* %q) { ; CHECK: @test1(i8* %p, i8* %q) @@ -72,6 +72,37 @@ ret i32 %c } + + +define i32 @test8(i32* %p, i32* %q) { +; CHECK-LABEL: test8 +; CHECK-NEXT: store i32 15, i32* %p +; CHECK-NEXT: ret i32 0 +; Since we know the location is invariant, we can forward the +; load across the potentially aliasing store. + + %a = load i32* %q, !tbaa !10 + store i32 15, i32* %p + %b = load i32* %q, !tbaa !10 + %c = sub i32 %a, %b + ret i32 %c +} +define i32 @test9(i32* %p, i32* %q) { +; CHECK-LABEL: test9 +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: ret i32 0 +; Since we know the location is invariant, we can forward the +; load across the potentially aliasing store (within the call). + + %a = load i32* %q, !tbaa !10 + call void @clobber() + %b = load i32* %q, !tbaa !10 + %c = sub i32 %a, %b + ret i32 %c +} + + +declare void @clobber() declare i32 @foo(i8*) readonly ; CHECK: [[TAGC]] = !{[[TYPEC:!.*]], [[TYPEC]], i64 0} @@ -89,3 +120,9 @@ !6 = !{!"A", !2} !7 = !{!"B", !6} !8 = !{!"another root", null} + + +;; A TBAA structure who's only point is to have a constant location +!9 = !{!"yet another root"} +!10 = !{!"node", !9, i64 1} +