Index: llvm/trunk/lib/Transforms/Utils/MemorySSA.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/MemorySSA.cpp +++ llvm/trunk/lib/Transforms/Utils/MemorySSA.cpp @@ -1288,6 +1288,7 @@ // Note: Correctness depends on this being initialized to 0, which densemap // does unsigned long LowerBound; + const BasicBlock *LowerBoundBlock; // This is where the last walk for this memory location ended. unsigned long LastKill; bool LastKillValid; @@ -1333,7 +1334,6 @@ VersionStack.pop_back(); ++PopEpoch; } - for (MemoryAccess &MA : *Accesses) { auto *MU = dyn_cast(&MA); if (!MU) { @@ -1355,13 +1355,24 @@ if (LocInfo.PopEpoch != PopEpoch) { LocInfo.PopEpoch = PopEpoch; LocInfo.StackEpoch = StackEpoch; - // If the lower bound was in the info we popped, we have to reset it. - if (LocInfo.LowerBound >= VersionStack.size()) { + // If the lower bound was in something that no longer dominates us, we + // have to reset it. + // We can't simply track stack size, because the stack may have had + // pushes/pops in the meantime. + // XXX: This is non-optimal, but only is slower cases with heavily + // branching dominator trees. To get the optimal number of queries would + // be to make lowerbound and lastkill a per-loc stack, and pop it until + // the top of that stack dominates us. This does not seem worth it ATM. + // A much cheaper optimization would be to always explore the deepest + // branch of the dominator tree first. This will guarantee this resets on + // the smallest set of blocks. + if (LocInfo.LowerBoundBlock && LocInfo.LowerBoundBlock != BB && + !DT->dominates(LocInfo.LowerBoundBlock, BB)){ // Reset the lower bound of things to check. // TODO: Some day we should be able to reset to last kill, rather than // 0. - LocInfo.LowerBound = 0; + LocInfo.LowerBoundBlock = VersionStack[0]->getBlock(); LocInfo.LastKillValid = false; } } else if (LocInfo.StackEpoch != StackEpoch) { @@ -1437,6 +1448,7 @@ MU->setDefiningAccess(VersionStack[LocInfo.LastKill]); } LocInfo.LowerBound = VersionStack.size() - 1; + LocInfo.LowerBoundBlock = BB; } } Index: llvm/trunk/test/Transforms/Util/MemorySSA/pr28880.ll =================================================================== --- llvm/trunk/test/Transforms/Util/MemorySSA/pr28880.ll +++ llvm/trunk/test/Transforms/Util/MemorySSA/pr28880.ll @@ -0,0 +1,51 @@ +; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='print,verify' -disable-output < %s 2>&1 | FileCheck %s + +; This testcase is reduced from SingleSource/Benchmarks/Misc/fbench.c +; It is testing to make sure that the MemorySSA use optimizer +; comes up with right answers when dealing with multiple MemoryLocations +; over different blocks. See PR28880 for more details. +@global = external hidden unnamed_addr global double, align 8 +@global.1 = external hidden unnamed_addr global double, align 8 + +; Function Attrs: nounwind ssp uwtable +define hidden fastcc void @hoge() unnamed_addr #0 { +bb: + br i1 undef, label %bb1, label %bb2 + +bb1: ; preds = %bb +; These accesses should not conflict. +; CHECK: 1 = MemoryDef(liveOnEntry) +; 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store double undef, double* @global, align 8 + store double undef, double* @global, align 8 +; CHECK: MemoryUse(liveOnEntry) +; MemoryUse(liveOnEntry) +; CHECK-NEXT: %tmp = load double, double* @global.1, align 8 + %tmp = load double, double* @global.1, align 8 + unreachable + +bb2: ; preds = %bb + br label %bb3 + +bb3: ; preds = %bb2 + br i1 undef, label %bb4, label %bb6 + +bb4: ; preds = %bb3 +; These accesses should conflict. +; CHECK: 2 = MemoryDef(liveOnEntry) +; 2 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store double 0.000000e+00, double* @global.1, align 8 + store double 0.000000e+00, double* @global.1, align 8 +; CHECK: MemoryUse(2) +; MemoryUse(2) +; CHECK-NEXT: %tmp5 = load double, double* @global.1, align 8 + %tmp5 = load double, double* @global.1, align 8 + unreachable + +bb6: ; preds = %bb3 + unreachable +} + +attributes #0 = { nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" } +