Index: lib/Analysis/MemoryDependenceAnalysis.cpp =================================================================== --- lib/Analysis/MemoryDependenceAnalysis.cpp +++ lib/Analysis/MemoryDependenceAnalysis.cpp @@ -978,22 +978,32 @@ Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad, BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) { - // Do a binary search to see if we already have an entry for this block in - // the cache set. If so, find it. - NonLocalDepInfo::iterator Entry = std::upper_bound( - Cache->begin(), Cache->begin() + NumSortedEntries, NonLocalDepEntry(BB)); - if (Entry != Cache->begin() && (Entry - 1)->getBB() == BB) - --Entry; - NonLocalDepEntry *ExistingResult = nullptr; - if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB) - ExistingResult = &*Entry; - - // If we have a cached entry, and it is non-dirty, use it as the value for - // this dependency. - if (ExistingResult && !ExistingResult->getResult().isDirty()) { - ++NumCacheNonLocalPtr; - return ExistingResult->getResult(); + bool isInvariantLoad = false; + + if (LoadInst *LI = dyn_cast_or_null(QueryInst)) + isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load); + + // Due to the semantics of invariant loads many dependencies are ignored. Thus + // results of dependence analysis can't be shared with general case. + // Don't use cached results for invariant load. + if (!isInvariantLoad) { + // Do a binary search to see if we already have an entry for this block in + // the cache set. If so, find it. + NonLocalDepInfo::iterator Entry = std::upper_bound( + Cache->begin(), Cache->begin() + NumSortedEntries, NonLocalDepEntry(BB)); + if (Entry != Cache->begin() && (Entry - 1)->getBB() == BB) + --Entry; + + if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB) + ExistingResult = &*Entry; + + // If we have a cached entry, and it is non-dirty, use it as the value for + // this dependency. + if (ExistingResult && !ExistingResult->getResult().isDirty()) { + ++NumCacheNonLocalPtr; + return ExistingResult->getResult(); + } } // Otherwise, we have to scan for the value. If we have a dirty cache @@ -1017,12 +1027,15 @@ MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB, QueryInst); - // If we had a dirty entry for the block, update it. Otherwise, just add - // a new entry. - if (ExistingResult) - ExistingResult->setResult(Dep); - else - Cache->push_back(NonLocalDepEntry(BB, Dep)); + // Don't put results of dependence analysis into cache. + if (!isInvariantLoad) { + // If we had a dirty entry for the block, update it. Otherwise, just add + // a new entry. + if (ExistingResult) + ExistingResult->setResult(Dep); + else + Cache->push_back(NonLocalDepEntry(BB, Dep)); + } // If the block has a dependency (i.e. it isn't completely transparent to // the value), remember the reverse association because we just added it Index: test/Analysis/DependenceAnalysis/InvariantLoad.ll =================================================================== --- /dev/null +++ test/Analysis/DependenceAnalysis/InvariantLoad.ll @@ -0,0 +1,40 @@ +; RUN: opt < %s -passes=gvn -S | FileCheck %s + +; CHECK: ret i8 0 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" +target triple = "x86_64-unknown-linux-gnu" + +declare void @llvm.memset.p0i8.i8(i8*, i8, i32, i1) +declare void @foo(i8*) + +define i8 @test(i1 %cmp) { +entry: + %p = alloca i8 + %addr = getelementptr inbounds i8, i8* %p, i64 0 + store i8 5, i8* %addr + br label %header +header: + %i = phi i8 [0, %entry], [%i.inc, %backedge] + br i1 %cmp, label %alive, label %dead +dead: + call void @foo(i8* %p) + %v = load i8, i8* %addr, !invariant.load !1 + %i.1 = add i8 %i, %v + br label %alive +alive: + %i.2 = phi i8 [%i, %header], [%i.1, %dead] + store i8 -5, i8* %addr + br label %backedge +backedge: + call void @llvm.memset.p0i8.i8(i8 * align 1 %p, i8 0, i32 1, i1 false) + %i.inc = add i8 %i.2, 1 + %cmp.loop = icmp ugt i8 %i.inc, 100 + br i1 %cmp.loop, label %exit, label %header +exit: + %res = load i8, i8* %addr + ret i8 %res +} + +!1 = !{} + Index: test/Analysis/DependenceAnalysis/InvariantLoad2.ll =================================================================== --- /dev/null +++ test/Analysis/DependenceAnalysis/InvariantLoad2.ll @@ -0,0 +1,60 @@ +; RUN: opt < %s -gvn -S | FileCheck %s + + +; Check that first two loads are not optimized out while the one marked with +; invariant.load reuses %res1 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" +target triple = "x86_64-unknown-linux-gnu" + +declare void @llvm.memset.p0i8.i8(i8*, i8, i32, i1) +declare void @foo(i8*) + +define i8 @test(i1 %cmp, i8 *%p) { +; CHECK: %res1 = load i8, i8* %p +; CHECK: %res2 = load i8, i8* %p +; CHECK: %res.dead = add i8 %res1, %res1 + +entry: + %res1 = load i8, i8* %p + call void @foo(i8 *%p) + br i1 %cmp, label %b2, label %b1 +b1: + %res2 = load i8, i8* %p + %res3 = add i8 %res1, %res2 + br label %alive +b2: + %v = load i8, i8* %p, !invariant.load !1 + %res.dead = add i8 %v, %res1 + br label %alive +alive: + %res.phi = phi i8 [%res3, %b1], [%res.dead, %b2] + ret i8 %res.phi +} + +; This is essentially the same test case as the above one but with %b1 and %b2 +; swapped in "br i1 %cmp, label %b1, label %b2" instruction. That helps us to +; ensure that results doesn't depend on visiting order. +define i8 @test2(i1 %cmp, i8 *%p) { +; CHECK: %res1 = load i8, i8* %p +; CHECK: %res2 = load i8, i8* %p +; CHECK: %res.dead = add i8 %res1, %res1 +entry: + %res1 = load i8, i8* %p + call void @foo(i8 *%p) + br i1 %cmp, label %b1, label %b2 +b1: + %res2 = load i8, i8* %p + %res3 = add i8 %res1, %res2 + br label %alive +b2: + %v = load i8, i8* %p, !invariant.load !1 + %res.dead = add i8 %v, %res1 + br label %alive +alive: + %res.phi = phi i8 [%res3, %b1], [%res.dead, %b2] + ret i8 %res.phi +} + +!1 = !{} +