Index: llvm/trunk/include/llvm/Analysis/MemoryDependenceAnalysis.h =================================================================== --- llvm/trunk/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ llvm/trunk/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -26,6 +26,7 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PredIteratorCache.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" #include @@ -314,7 +315,10 @@ /// Cache storing single nonlocal def for the instruction. /// It is set when nonlocal def would be found in function returning only /// local dependencies. - DenseMap NonLocalDefsCache; + DenseMap, NonLocalDepResult> NonLocalDefsCache; + using ReverseNonLocalDefsCacheTy = + DenseMap>; + ReverseNonLocalDefsCacheTy ReverseNonLocalDefsCache; /// This map stores the cached results of doing a pointer lookup at the /// bottom of a block. Index: llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp =================================================================== --- llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp +++ llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -433,6 +433,7 @@ NonLocalDefsCache.try_emplace( LI, NonLocalDepResult(ClosestDependency->getParent(), MemDepResult::getDef(ClosestDependency), nullptr)); + ReverseNonLocalDefsCache[ClosestDependency].insert(LI); return MemDepResult::getNonLocal(); } @@ -919,12 +920,12 @@ "Can't get pointer deps of a non-pointer!"); Result.clear(); { - // Check if there is cached Def with invariant.group. FIXME: cache might be - // invalid if cached instruction would be removed between call to - // getPointerDependencyFrom and this function. + // Check if there is cached Def with invariant.group. auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst); if (NonLocalDefIt != NonLocalDefsCache.end()) { - Result.push_back(std::move(NonLocalDefIt->second)); + Result.push_back(NonLocalDefIt->second); + ReverseNonLocalDefsCache[NonLocalDefIt->second.getResult().getInst()] + .erase(QueryInst); NonLocalDefsCache.erase(NonLocalDefIt); return; } @@ -1459,9 +1460,29 @@ return true; } -/// If P exists in CachedNonLocalPointerInfo, remove it. +/// If P exists in CachedNonLocalPointerInfo or NonLocalDefsCache, remove it. void MemoryDependenceResults::RemoveCachedNonLocalPointerDependencies( ValueIsLoadPair P) { + + // Most of the time this cache is empty. + if (!NonLocalDefsCache.empty()) { + auto it = NonLocalDefsCache.find(P.getPointer()); + if (it != NonLocalDefsCache.end()) { + RemoveFromReverseMap(ReverseNonLocalDefsCache, + it->second.getResult().getInst(), P.getPointer()); + NonLocalDefsCache.erase(it); + } + + if (auto *I = dyn_cast(P.getPointer())) { + auto toRemoveIt = ReverseNonLocalDefsCache.find(I); + if (toRemoveIt != ReverseNonLocalDefsCache.end()) { + for (const auto &entry : toRemoveIt->second) + NonLocalDefsCache.erase(entry); + ReverseNonLocalDefsCache.erase(toRemoveIt); + } + } + } + CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P); if (It == NonLocalPointerDeps.end()) return; Index: llvm/trunk/test/Analysis/MemoryDependenceAnalysis/invariant.group-bug.ll =================================================================== --- llvm/trunk/test/Analysis/MemoryDependenceAnalysis/invariant.group-bug.ll +++ llvm/trunk/test/Analysis/MemoryDependenceAnalysis/invariant.group-bug.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -gvn -S < %s | FileCheck %s + +; Memdep had funny bug related to invariant.groups - because it did not +; invalidated cache, in some very rare cases it was possible to show memory +; dependence of the instruction that was deleted, but because other instruction +; took it's place it resulted in call to vtable! Removing any of the branch +; hides the bug. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%0 = type { i32 (...)**, %1 } +%1 = type { %2 } +%2 = type { %3 } +%3 = type { %4, i64, %5 } +%4 = type { i8* } +%5 = type { i64, [8 x i8] } + +define void @fail(i1* noalias sret, %0*, %1*, i8*) local_unnamed_addr #0 { +; CHECK-LABEL: @fail( +; CHECK-NEXT: [[TMP5:%.*]] = bitcast %0* [[TMP1:%.*]] to i64 (%0*)*** +; CHECK-NEXT: [[TMP6:%.*]] = load i64 (%0*)**, i64 (%0*)*** [[TMP5]], align 8, !invariant.group !6 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64 (%0*)*, i64 (%0*)** [[TMP6]], i64 6 +; CHECK-NEXT: [[TMP8:%.*]] = load i64 (%0*)*, i64 (%0*)** [[TMP7]], align 8, !invariant.load !6 +; CHECK-NEXT: [[TMP9:%.*]] = tail call i64 [[TMP8]](%0* [[TMP1]]) #1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[TMP1]], %1* [[TMP2:%.*]], i64 0, i32 0, i32 0, i32 0, i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 8 +; CHECK-NEXT: store i8 0, i8* [[TMP11]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i64 (%0*)** [[TMP6]] to i64 (%0*, i8*, i64)** +; CHECK-NEXT: br i1 undef +; CHECK: [[TMP14:%.*]] = bitcast %0* [[TMP1]] to i64 (%0*, i8*, i64)*** +; CHECK-NEXT: [[DOTPHI_TRANS_INSERT:%.*]] = getelementptr inbounds i64 (%0*, i8*, i64)*, i64 (%0*, i8*, i64)** [[TMP12]], i64 22 +; CHECK-NEXT: [[DOTPRE:%.*]] = load i64 (%0*, i8*, i64)*, i64 (%0*, i8*, i64)** [[DOTPHI_TRANS_INSERT]], align 8, !invariant.load !6 +; CHECK-NEXT: br label [[TMP15:%.*]] +; CHECK: [[TMP16:%.*]] = call i64 [[DOTPRE]](%0* nonnull [[TMP1]], i8* null, i64 0) #1 + + %5 = bitcast %0* %1 to i64 (%0*)*** + %6 = load i64 (%0*)**, i64 (%0*)*** %5, align 8, !invariant.group !6 + %7 = getelementptr inbounds i64 (%0*)*, i64 (%0*)** %6, i64 6 + %8 = load i64 (%0*)*, i64 (%0*)** %7, align 8, !invariant.load !6 + %9 = tail call i64 %8(%0* %1) #1 + %10 = getelementptr inbounds %1, %1* %2, i64 0, i32 0, i32 0, i32 0, i32 0 + %11 = load i8*, i8** %10, align 8 + store i8 0, i8* %11, align 1 + br i1 undef, label %12, label %31 + +;