Index: include/llvm/Transforms/Utils/MemorySSA.h =================================================================== --- include/llvm/Transforms/Utils/MemorySSA.h +++ include/llvm/Transforms/Utils/MemorySSA.h @@ -752,7 +752,7 @@ private: MemoryAccessPair UpwardsDFSWalk(MemoryAccess *, const MemoryLocation &, - UpwardsMemoryQuery &, bool); + UpwardsMemoryQuery &, bool, bool); MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &); bool instructionClobbersQuery(const MemoryDef *, UpwardsMemoryQuery &, const MemoryLocation &Loc) const; Index: lib/Transforms/Utils/MemorySSA.cpp =================================================================== --- lib/Transforms/Utils/MemorySSA.cpp +++ lib/Transforms/Utils/MemorySSA.cpp @@ -872,11 +872,14 @@ MemoryAccessPair CachingMemorySSAWalker::UpwardsDFSWalk( MemoryAccess *StartingAccess, const MemoryLocation &Loc, - UpwardsMemoryQuery &Q, bool FollowingBackedge) { + UpwardsMemoryQuery &Q, bool FollowingBackedge, bool SeenBackedge) { MemoryAccess *ModifyingAccess = nullptr; auto DFI = df_begin(StartingAccess); - for (auto DFE = df_end(StartingAccess); DFI != DFE;) { + // FIXME: Refactor this loop + for (auto DFE = df_end(StartingAccess); DFI != DFE; ++DFI) { + assert(!ModifyingAccess && + "ModifyingAccess was set, but we're still in the loop?"); MemoryAccess *CurrAccess = *DFI; if (MSSA->isLiveOnEntryDef(CurrAccess)) return {CurrAccess, Loc}; @@ -894,81 +897,107 @@ } } - // We need to know whether it is a phi so we can track backedges. - // Otherwise, walk all upward defs. - if (!isa(CurrAccess)) { - ++DFI; + // If this isn't a phi, we can continue walking all upward defs. + if (!isa(CurrAccess)) continue; - } // Recurse on PHI nodes, since we need to change locations. // TODO: Allow graphtraits on pairs, which would turn this whole function // into a normal single depth first walk. MemoryAccess *FirstDef = nullptr; - DFI = DFI.skipChildren(); const MemoryAccessPair PHIPair(CurrAccess, Loc); - bool VisitedOnlyOne = true; - for (auto MPI = upward_defs_begin(PHIPair), MPE = upward_defs_end(); - MPI != MPE; ++MPI) { - // Don't follow this path again if we've followed it once - if (!Q.Visited.insert(*MPI).second) - continue; - - bool Backedge = - !FollowingBackedge && - DT->dominates(CurrAccess->getBlock(), MPI.getPhiArgBlock()); - - MemoryAccessPair CurrentPair = - UpwardsDFSWalk(MPI->first, MPI->second, Q, Backedge); - // All the phi arguments should reach the same point if we can bypass - // this phi. The alternative is that they hit this phi node, which - // means we can skip this argument. - if (FirstDef && CurrentPair.first != PHIPair.first && - CurrentPair.first != FirstDef) { - ModifyingAccess = CurrAccess; - break; + // If all phis end up pointing to the same place, we can skip this phi. + // This loop tries to detect these cases. + auto MPI = upward_defs_begin(PHIPair), MPE = upward_defs_end(); + for (; MPI != MPE; ++MPI) { + MemoryAccess *Clobber; + // There's no point in putting liveOnEntry into `Visited`, nor is there a + // point in trying to walk it. + if (MSSA->isLiveOnEntryDef(MPI->first)) { + Clobber = MPI->first; + } else { + // If we were able to find this in the cache, we can get away without + // putting this in the `Visited` map, because we'll never recurse on it. + Clobber = doCacheLookup(MPI->first, Q, MPI->second); + if (!Clobber) { + bool SeenBefore = !Q.Visited.insert(*MPI).second; + // If the cache gives us nothing, we should walk. We need to be + // careful if we've seen this before *and* we've seen a back-edge, + // because we may have a phi that cycles back to itself. For this + // reason, we need to answer conservatively. + if (SeenBefore && SeenBackedge) + break; + + bool Backedge = + !FollowingBackedge && + DT->dominates(CurrAccess->getBlock(), MPI.getPhiArgBlock()); + + Clobber = UpwardsDFSWalk(MPI->first, MPI->second, Q, Backedge, + Backedge || SeenBackedge) + .first; + } } + // If we looped back to this phi, no interesting defs happen on any of the + // paths we took, so we can safely ignore it. + if (Clobber == CurrAccess) + continue; + if (!FirstDef) - FirstDef = CurrentPair.first; - else - VisitedOnlyOne = false; + FirstDef = Clobber; + else if (FirstDef != Clobber) + break; } - // The above loop determines if all arguments of the phi node reach the - // same place. However we skip arguments that are cyclically dependent - // only on the value of this phi node. This means in some cases, we may - // only visit one argument of the phi node, and the above loop will - // happily say that all the arguments are the same. However, in that case, - // we still can't walk past the phi node, because that argument still - // kills the access unless we hit the top of the function when walking - // that argument. - if (VisitedOnlyOne && FirstDef && !MSSA->isLiveOnEntryDef(FirstDef)) +#ifndef NDEBUG + // The above loop should examine all of the phi's children, so we can skip + // them. Because all nodes except Phis only have one edge, skipping the + // children of a phi should terminate our DFS. + // + // Don't call skipChildren directly on the iterator because doing so deletes + // the iterator's search stack, and we need that so we can cache everything. + auto DFICopy = DFI; + assert(DFICopy.skipChildren() == DFE); +#endif + + if (MPI != MPE) { + // If we didn't examine everything, we need to answer conservatively. ModifyingAccess = CurrAccess; + } else { + // FirstDef can only be null if all "Clobber"s pointed back to this phi, + // which would imply we have a loop with no entry. While this isn't + // a valid construct, we still need to handle it, because users can ask + // the walker to walk arbitrary IR. + ModifyingAccess = FirstDef ? FirstDef : MSSA->getLiveOnEntryDef(); + } + break; } - if (!ModifyingAccess) - return {MSSA->getLiveOnEntryDef(), Q.StartingLoc}; + if (!ModifyingAccess) { + // If DFI hit the end, it will have 0 path entries. Give up early. + assert(DFI == df_end(StartingAccess) && + "Loop should set ModifyingAccess before exiting early."); + return {MSSA->getLiveOnEntryDef(), Loc}; + } - const BasicBlock *OriginalBlock = Q.OriginalAccess->getBlock(); + const BasicBlock *OriginalBlock = StartingAccess->getBlock(); unsigned N = DFI.getPathLength(); - MemoryAccess *FinalAccess = ModifyingAccess; for (; N != 0; --N) { - ModifyingAccess = DFI.getPath(N - 1); - BasicBlock *CurrBlock = ModifyingAccess->getBlock(); + MemoryAccess *PathAccess = DFI.getPath(N - 1); + BasicBlock *CurrBlock = PathAccess->getBlock(); if (!FollowingBackedge) - doCacheInsert(ModifyingAccess, FinalAccess, Q, Loc); + doCacheInsert(PathAccess, ModifyingAccess, Q, Loc); if (DT->dominates(CurrBlock, OriginalBlock) && (CurrBlock != OriginalBlock || !FollowingBackedge || - MSSA->locallyDominates(ModifyingAccess, Q.OriginalAccess))) + MSSA->locallyDominates(PathAccess, StartingAccess))) break; } // Cache everything else on the way back. The caller should cache // Q.OriginalAccess for us. for (; N != 0; --N) { - MemoryAccess *CacheAccess = DFI.getPath(N - 1); - doCacheInsert(CacheAccess, ModifyingAccess, Q, Loc); + MemoryAccess *PathAccess = DFI.getPath(N - 1); + doCacheInsert(PathAccess, ModifyingAccess, Q, Loc); } assert(Q.Visited.size() < 1000 && "Visited too much"); @@ -982,7 +1011,7 @@ MemoryAccess * CachingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *StartingAccess, UpwardsMemoryQuery &Q) { - return UpwardsDFSWalk(StartingAccess, Q.StartingLoc, Q, false).first; + return UpwardsDFSWalk(StartingAccess, Q.StartingLoc, Q, false, false).first; } MemoryAccess * Index: test/Transforms/Util/MemorySSA/cyclicphi.ll =================================================================== --- test/Transforms/Util/MemorySSA/cyclicphi.ll +++ test/Transforms/Util/MemorySSA/cyclicphi.ll @@ -31,3 +31,95 @@ %tmp79 = getelementptr inbounds i64, i64* %tmp78, i64 undef br label %bb26 } + +; CHECK-LABEL: define void @quux_skip +define void @quux_skip(%struct.hoge* noalias %f, i64* noalias %g) align 2 { + %tmp = getelementptr inbounds %struct.hoge, %struct.hoge* %f, i64 0, i32 1, i32 0 + %tmp24 = getelementptr inbounds %struct.hoge, %struct.hoge* %f, i64 0, i32 1 + %tmp25 = bitcast %struct.widget* %tmp24 to i64** + br label %bb26 + +bb26: ; preds = %bb77, %0 +; CHECK: 3 = MemoryPhi({%0,liveOnEntry},{bb77,2}) +; CHECK-NEXT: br i1 undef, label %bb68, label %bb77 + br i1 undef, label %bb68, label %bb77 + +bb68: ; preds = %bb26 +; CHECK: MemoryUse(3) +; CHECK-NEXT: %tmp69 = load i64, i64* %g, align 8 + %tmp69 = load i64, i64* %g, align 8 +; CHECK: 1 = MemoryDef(3) +; CHECK-NEXT: store i64 %tmp69, i64* %g, align 8 + store i64 %tmp69, i64* %g, align 8 + br label %bb77 + +bb77: ; preds = %bb68, %bb26 +; CHECK: 2 = MemoryPhi({bb26,3},{bb68,1}) +; NOTE: we're currently a bit too conservative here; this could point to +; liveOnEntry. +; CHECK: MemoryUse(3) +; CHECK-NEXT: %tmp78 = load i64*, i64** %tmp25, align 8 + %tmp78 = load i64*, i64** %tmp25, align 8 + %tmp79 = getelementptr inbounds i64, i64* %tmp78, i64 undef + br label %bb26 +} + +; CHECK-LABEL: define void @quux_dominated +define void @quux_dominated(%struct.hoge* noalias %f, i64* noalias %g) align 2 { + %tmp = getelementptr inbounds %struct.hoge, %struct.hoge* %f, i64 0, i32 1, i32 0 + %tmp24 = getelementptr inbounds %struct.hoge, %struct.hoge* %f, i64 0, i32 1 + %tmp25 = bitcast %struct.widget* %tmp24 to i64** + br label %bb26 + +bb26: ; preds = %bb77, %0 +; CHECK: 4 = MemoryPhi({%0,liveOnEntry},{bb77,2}) +; CHECK: MemoryUse(4) +; CHECK-NEXT: load i64*, i64** %tmp25, align 8 + load i64*, i64** %tmp25, align 8 + br i1 undef, label %bb68, label %bb77 + +bb68: ; preds = %bb26 +; CHECK: MemoryUse(4) +; CHECK-NEXT: %tmp69 = load i64, i64* %g, align 8 + %tmp69 = load i64, i64* %g, align 8 +; CHECK: 1 = MemoryDef(4) +; CHECK-NEXT: store i64 %tmp69, i64* %g, align 8 + store i64 %tmp69, i64* %g, align 8 + br label %bb77 + +bb77: ; preds = %bb68, %bb26 +; CHECK: 3 = MemoryPhi({bb26,4},{bb68,1}) +; CHECK: 2 = MemoryDef(3) +; CHECK-NEXT: store i64* null, i64** %tmp25, align 8 + store i64* null, i64** %tmp25, align 8 + br label %bb26 +} + +; CHECK-LABEL: define void @quux_nodominate +define void @quux_nodominate(%struct.hoge* noalias %f, i64* noalias %g) align 2 { + %tmp = getelementptr inbounds %struct.hoge, %struct.hoge* %f, i64 0, i32 1, i32 0 + %tmp24 = getelementptr inbounds %struct.hoge, %struct.hoge* %f, i64 0, i32 1 + %tmp25 = bitcast %struct.widget* %tmp24 to i64** + br label %bb26 + +bb26: ; preds = %bb77, %0 +; CHECK: 3 = MemoryPhi({%0,liveOnEntry},{bb77,2}) +; CHECK: MemoryUse(liveOnEntry) +; CHECK-NEXT: load i64*, i64** %tmp25, align 8 + load i64*, i64** %tmp25, align 8 + br i1 undef, label %bb68, label %bb77 + +bb68: ; preds = %bb26 +; CHECK: MemoryUse(3) +; CHECK-NEXT: %tmp69 = load i64, i64* %g, align 8 + %tmp69 = load i64, i64* %g, align 8 +; CHECK: 1 = MemoryDef(3) +; CHECK-NEXT: store i64 %tmp69, i64* %g, align 8 + store i64 %tmp69, i64* %g, align 8 + br label %bb77 + +bb77: ; preds = %bb68, %bb26 +; CHECK: 2 = MemoryPhi({bb26,3},{bb68,1}) +; CHECK-NEXT: br label %bb26 + br label %bb26 +} Index: test/Transforms/Util/MemorySSA/phi-translation.ll =================================================================== --- /dev/null +++ test/Transforms/Util/MemorySSA/phi-translation.ll @@ -0,0 +1,87 @@ +; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s + +; %ptr can't alias %local, so we should be able to optimize the use of %local to +; point to the store to %local. +; CHECK-LABEL: define void @check +define void @check(i8* %ptr, i1 %bool) { +entry: + %local = alloca i8, align 1 +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i8 0, i8* %local, align 1 + store i8 0, i8* %local, align 1 + br i1 %bool, label %if.then, label %if.end + +if.then: + %p2 = getelementptr inbounds i8, i8* %ptr, i32 1 +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: store i8 0, i8* %p2, align 1 + store i8 0, i8* %p2, align 1 + br label %if.end + +if.end: +; CHECK: 3 = MemoryPhi({entry,1},{if.then,2}) +; CHECK: MemoryUse(1) +; CHECK-NEXT: load i8, i8* %local, align 1 + load i8, i8* %local, align 1 + ret void +} + +; This case is a bit more interesting, and exists specifically to test the +; caching walker's walking algorithm. +; +; The phi structure is: +; +; 2 -> 3 +; ^ ^ +; \ / +; 1 +; +; Where each number is a phi, each arrow is a directed edge from that phi to +; another phi, and the phi number is the order that phi gets visited in by the +; walker. +; +; We want to optimize a use that uses phi 1 to use phi 3. This case is +; interesting because it hits the case where we've already visited the phi, +; and no phi translation has occurred. So, we need to fall back to our cache. +; +; CHECK-LABEL: define void @check2 +define void @check2(i1 %val1, i1 %val2, i1 %val3) { +entry: + %local = alloca i8, align 1 + %local2 = alloca i8, align 1 + +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i8 0, i8* %local + store i8 0, i8* %local + br i1 %val1, label %if.then, label %phi.3 + + ; We need if.then so phi.3 starts with an actual phi. +if.then: +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: store i8 2, i8* %local2 + store i8 2, i8* %local2 + br i1 %val2, label %phi.2, label %phi.3 + +phi.3: +; CHECK: 6 = MemoryPhi({entry,1},{if.then,2}) +; CHECK: 3 = MemoryDef(6) +; CHECK-NEXT: store i8 3, i8* %local2 + store i8 3, i8* %local2 + br i1 %val3, label %phi.2, label %phi.1 + +phi.2: +; CHECK: 5 = MemoryPhi({if.then,2},{phi.3,3}) +; CHECK: 4 = MemoryDef(5) +; CHECK-NEXT: store i8 4, i8* %local2 + store i8 4, i8* %local2 + br label %phi.1 + +phi.1: +; Order matters here; phi.2 needs to come before phi.3, because that's the order +; they're visited in. +; CHECK: 7 = MemoryPhi({phi.2,4},{phi.3,3}) +; CHECK: MemoryUse(1) +; CHECK-NEXT: load i8, i8* %local + load i8, i8* %local + ret void +}