Index: llvm/trunk/lib/Transforms/Utils/MemorySSA.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/MemorySSA.cpp +++ llvm/trunk/lib/Transforms/Utils/MemorySSA.cpp @@ -898,11 +898,22 @@ continue; } +#ifndef NDEBUG + // The loop below visits the phi's children for us. Because phis are the + // only things with multiple edges, skipping the children should always lead + // us to the end of the loop. + // + // Use a copy of DFI because skipChildren would kill our search stack, which + // would make caching anything on the way back impossible. + auto DFICopy = DFI; + assert(DFICopy.skipChildren() == DFE && + "Skipping phi's children doesn't end the DFS?"); +#endif + // Recurse on PHI nodes, since we need to change locations. // TODO: Allow graphtraits on pairs, which would turn this whole function // into a normal single depth first walk. MemoryAccess *FirstDef = nullptr; - DFI = DFI.skipChildren(); const MemoryAccessPair PHIPair(CurrAccess, Loc); bool VisitedOnlyOne = true; for (auto MPI = upward_defs_begin(PHIPair), MPE = upward_defs_end(); @@ -932,32 +943,39 @@ VisitedOnlyOne = false; } - // The above loop determines if all arguments of the phi node reach the - // same place. However we skip arguments that are cyclically dependent - // only on the value of this phi node. This means in some cases, we may - // only visit one argument of the phi node, and the above loop will - // happily say that all the arguments are the same. However, in that case, - // we still can't walk past the phi node, because that argument still - // kills the access unless we hit the top of the function when walking - // that argument. - if (VisitedOnlyOne && FirstDef && !MSSA->isLiveOnEntryDef(FirstDef)) - ModifyingAccess = CurrAccess; + // If we exited the loop early, go with the result it gave us. + if (!ModifyingAccess) { + // The above loop determines if all arguments of the phi node reach the + // same place. However we skip arguments that are cyclically dependent + // only on the value of this phi node. This means in some cases, we may + // only visit one argument of the phi node, and the above loop will + // happily say that all the arguments are the same. However, in that case, + // we still can't walk past the phi node, because that argument still + // kills the access unless we hit the top of the function when walking + // that argument. + if (VisitedOnlyOne && !(FirstDef && MSSA->isLiveOnEntryDef(FirstDef))) { + ModifyingAccess = CurrAccess; + } else { + assert(FirstDef && "Visited multiple phis, but FirstDef isn't set?"); + ModifyingAccess = FirstDef; + } + } + break; } if (!ModifyingAccess) return {MSSA->getLiveOnEntryDef(), Q.StartingLoc}; - const BasicBlock *OriginalBlock = Q.OriginalAccess->getBlock(); + const BasicBlock *OriginalBlock = StartingAccess->getBlock(); unsigned N = DFI.getPathLength(); - MemoryAccess *FinalAccess = ModifyingAccess; for (; N != 0; --N) { - ModifyingAccess = DFI.getPath(N - 1); - BasicBlock *CurrBlock = ModifyingAccess->getBlock(); + MemoryAccess *CacheAccess = DFI.getPath(N - 1); + BasicBlock *CurrBlock = CacheAccess->getBlock(); if (!FollowingBackedge) - doCacheInsert(ModifyingAccess, FinalAccess, Q, Loc); + doCacheInsert(CacheAccess, ModifyingAccess, Q, Loc); if (DT->dominates(CurrBlock, OriginalBlock) && (CurrBlock != OriginalBlock || !FollowingBackedge || - MSSA->locallyDominates(ModifyingAccess, Q.OriginalAccess))) + MSSA->locallyDominates(CacheAccess, StartingAccess))) break; } Index: llvm/trunk/test/Transforms/Util/MemorySSA/cyclicphi.ll =================================================================== --- llvm/trunk/test/Transforms/Util/MemorySSA/cyclicphi.ll +++ llvm/trunk/test/Transforms/Util/MemorySSA/cyclicphi.ll @@ -31,3 +31,93 @@ %tmp79 = getelementptr inbounds i64, i64* %tmp78, i64 undef br label %bb26 } + +; CHECK-LABEL: define void @quux_skip +define void @quux_skip(%struct.hoge* noalias %f, i64* noalias %g) align 2 { + %tmp = getelementptr inbounds %struct.hoge, %struct.hoge* %f, i64 0, i32 1, i32 0 + %tmp24 = getelementptr inbounds %struct.hoge, %struct.hoge* %f, i64 0, i32 1 + %tmp25 = bitcast %struct.widget* %tmp24 to i64** + br label %bb26 + +bb26: ; preds = %bb77, %0 +; CHECK: 3 = MemoryPhi({%0,liveOnEntry},{bb77,2}) +; CHECK-NEXT: br i1 undef, label %bb68, label %bb77 + br i1 undef, label %bb68, label %bb77 + +bb68: ; preds = %bb26 +; CHECK: MemoryUse(3) +; CHECK-NEXT: %tmp69 = load i64, i64* %g, align 8 + %tmp69 = load i64, i64* %g, align 8 +; CHECK: 1 = MemoryDef(3) +; CHECK-NEXT: store i64 %tmp69, i64* %g, align 8 + store i64 %tmp69, i64* %g, align 8 + br label %bb77 + +bb77: ; preds = %bb68, %bb26 +; CHECK: 2 = MemoryPhi({bb26,3},{bb68,1}) +; FIXME: This should be MemoryUse(liveOnEntry) +; CHECK: MemoryUse(2) +; CHECK-NEXT: %tmp78 = load i64*, i64** %tmp25, align 8 + %tmp78 = load i64*, i64** %tmp25, align 8 + br label %bb26 +} + +; CHECK-LABEL: define void @quux_dominated +define void @quux_dominated(%struct.hoge* noalias %f, i64* noalias %g) align 2 { + %tmp = getelementptr inbounds %struct.hoge, %struct.hoge* %f, i64 0, i32 1, i32 0 + %tmp24 = getelementptr inbounds %struct.hoge, %struct.hoge* %f, i64 0, i32 1 + %tmp25 = bitcast %struct.widget* %tmp24 to i64** + br label %bb26 + +bb26: ; preds = %bb77, %0 +; CHECK: 4 = MemoryPhi({%0,liveOnEntry},{bb77,2}) +; CHECK: MemoryUse(4) +; CHECK-NEXT: load i64*, i64** %tmp25, align 8 + load i64*, i64** %tmp25, align 8 + br i1 undef, label %bb68, label %bb77 + +bb68: ; preds = %bb26 +; CHECK: MemoryUse(4) +; CHECK-NEXT: %tmp69 = load i64, i64* %g, align 8 + %tmp69 = load i64, i64* %g, align 8 +; CHECK: 1 = MemoryDef(4) +; CHECK-NEXT: store i64 %tmp69, i64* %g, align 8 + store i64 %tmp69, i64* %g, align 8 + br label %bb77 + +bb77: ; preds = %bb68, %bb26 +; CHECK: 3 = MemoryPhi({bb26,4},{bb68,1}) +; CHECK: 2 = MemoryDef(3) +; CHECK-NEXT: store i64* null, i64** %tmp25, align 8 + store i64* null, i64** %tmp25, align 8 + br label %bb26 +} + +; CHECK-LABEL: define void @quux_nodominate +define void @quux_nodominate(%struct.hoge* noalias %f, i64* noalias %g) align 2 { + %tmp = getelementptr inbounds %struct.hoge, %struct.hoge* %f, i64 0, i32 1, i32 0 + %tmp24 = getelementptr inbounds %struct.hoge, %struct.hoge* %f, i64 0, i32 1 + %tmp25 = bitcast %struct.widget* %tmp24 to i64** + br label %bb26 + +bb26: ; preds = %bb77, %0 +; CHECK: 3 = MemoryPhi({%0,liveOnEntry},{bb77,2}) +; CHECK: MemoryUse(liveOnEntry) +; CHECK-NEXT: load i64*, i64** %tmp25, align 8 + load i64*, i64** %tmp25, align 8 + br i1 undef, label %bb68, label %bb77 + +bb68: ; preds = %bb26 +; CHECK: MemoryUse(3) +; CHECK-NEXT: %tmp69 = load i64, i64* %g, align 8 + %tmp69 = load i64, i64* %g, align 8 +; CHECK: 1 = MemoryDef(3) +; CHECK-NEXT: store i64 %tmp69, i64* %g, align 8 + store i64 %tmp69, i64* %g, align 8 + br label %bb77 + +bb77: ; preds = %bb68, %bb26 +; CHECK: 2 = MemoryPhi({bb26,3},{bb68,1}) +; CHECK-NEXT: br label %bb26 + br label %bb26 +} Index: llvm/trunk/test/Transforms/Util/MemorySSA/phi-translation.ll =================================================================== --- llvm/trunk/test/Transforms/Util/MemorySSA/phi-translation.ll +++ llvm/trunk/test/Transforms/Util/MemorySSA/phi-translation.ll @@ -0,0 +1,147 @@ +; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s + +; %ptr can't alias %local, so we should be able to optimize the use of %local to +; point to the store to %local. +; CHECK-LABEL: define void @check +define void @check(i8* %ptr, i1 %bool) { +entry: + %local = alloca i8, align 1 +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i8 0, i8* %local, align 1 + store i8 0, i8* %local, align 1 + br i1 %bool, label %if.then, label %if.end + +if.then: + %p2 = getelementptr inbounds i8, i8* %ptr, i32 1 +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: store i8 0, i8* %p2, align 1 + store i8 0, i8* %p2, align 1 + br label %if.end + +if.end: +; CHECK: 3 = MemoryPhi({entry,1},{if.then,2}) +; CHECK: MemoryUse(1) +; CHECK-NEXT: load i8, i8* %local, align 1 + load i8, i8* %local, align 1 + ret void +} + +; CHECK-LABEL: define void @check2 +define void @check2(i1 %val1, i1 %val2, i1 %val3) { +entry: + %local = alloca i8, align 1 + %local2 = alloca i8, align 1 + +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i8 0, i8* %local + store i8 0, i8* %local + br i1 %val1, label %if.then, label %phi.3 + +if.then: +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: store i8 2, i8* %local2 + store i8 2, i8* %local2 + br i1 %val2, label %phi.2, label %phi.3 + +phi.3: +; CHECK: 6 = MemoryPhi({entry,1},{if.then,2}) +; CHECK: 3 = MemoryDef(6) +; CHECK-NEXT: store i8 3, i8* %local2 + store i8 3, i8* %local2 + br i1 %val3, label %phi.2, label %phi.1 + +phi.2: +; CHECK: 5 = MemoryPhi({if.then,2},{phi.3,3}) +; CHECK: 4 = MemoryDef(5) +; CHECK-NEXT: store i8 4, i8* %local2 + store i8 4, i8* %local2 + br label %phi.1 + +phi.1: +; Order matters here; phi.2 needs to come before phi.3, because that's the order +; they're visited in. +; CHECK: 7 = MemoryPhi({phi.2,4},{phi.3,3}) +; FIXME: This should be MemoryUse(1) +; CHECK: MemoryUse(7) +; CHECK-NEXT: load i8, i8* %local + load i8, i8* %local + ret void +} + +; CHECK-LABEL: define void @cross_phi +define void @cross_phi(i8* noalias %p1, i8* noalias %p2) { +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i8 0, i8* %p1 + store i8 0, i8* %p1 +; CHECK: MemoryUse(1) +; CHECK-NEXT: load i8, i8* %p1 + load i8, i8* %p1 + br i1 undef, label %a, label %b + +a: +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: store i8 0, i8* %p2 + store i8 0, i8* %p2 + br i1 undef, label %c, label %d + +b: +; CHECK: 3 = MemoryDef(1) +; CHECK-NEXT: store i8 1, i8* %p2 + store i8 1, i8* %p2 + br i1 undef, label %c, label %d + +c: +; CHECK: 6 = MemoryPhi({a,2},{b,3}) +; CHECK: 4 = MemoryDef(6) +; CHECK-NEXT: store i8 2, i8* %p2 + store i8 2, i8* %p2 + br label %e + +d: +; CHECK: 7 = MemoryPhi({a,2},{b,3}) +; CHECK: 5 = MemoryDef(7) +; CHECK-NEXT: store i8 3, i8* %p2 + store i8 3, i8* %p2 + br label %e + +e: +; 8 = MemoryPhi({c,4},{d,5}) +; FIXME: This should be MemoryUse(1) +; CHECK: MemoryUse(8) +; CHECK-NEXT: load i8, i8* %p1 + load i8, i8* %p1 + ret void +} + +; CHECK-LABEL: define void @looped +define void @looped(i8* noalias %p1, i8* noalias %p2) { +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i8 0, i8* %p1 + store i8 0, i8* %p1 + br label %loop.1 + +loop.1: +; CHECK: 7 = MemoryPhi({%0,1},{loop.3,4}) +; CHECK: 2 = MemoryDef(7) +; CHECK-NEXT: store i8 0, i8* %p2 + store i8 0, i8* %p2 + br i1 undef, label %loop.2, label %loop.3 + +loop.2: +; CHECK: 6 = MemoryPhi({loop.1,2},{loop.3,4}) +; CHECK: 3 = MemoryDef(6) +; CHECK-NEXT: store i8 1, i8* %p2 + store i8 1, i8* %p2 + br label %loop.3 + +loop.3: +; CHECK: 5 = MemoryPhi({loop.1,2},{loop.2,3}) +; CHECK: 4 = MemoryDef(5) +; CHECK-NEXT: store i8 2, i8* %p2 + store i8 2, i8* %p2 +; FIXME: This should be MemoryUse(1) +; CHECK: MemoryUse(5) +; CHECK-NEXT: load i8, i8* %p1 + load i8, i8* %p1 + br i1 undef, label %loop.2, label %loop.1 +}