Index: llvm/trunk/include/llvm/Analysis/MemorySSAUpdater.h =================================================================== --- llvm/trunk/include/llvm/Analysis/MemorySSAUpdater.h +++ llvm/trunk/include/llvm/Analysis/MemorySSAUpdater.h @@ -43,6 +43,7 @@ #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" @@ -141,8 +142,12 @@ void moveTo(MemoryUseOrDef *What, BasicBlock *BB, WhereType Where); MemoryAccess *getPreviousDef(MemoryAccess *); MemoryAccess *getPreviousDefInBlock(MemoryAccess *); - MemoryAccess *getPreviousDefFromEnd(BasicBlock *); - MemoryAccess *getPreviousDefRecursive(BasicBlock *); + MemoryAccess * + getPreviousDefFromEnd(BasicBlock *, + DenseMap> &); + MemoryAccess * + getPreviousDefRecursive(BasicBlock *, + DenseMap> &); MemoryAccess *recursePhi(MemoryAccess *Phi); template MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi, RangeType &Operands); Index: llvm/trunk/lib/Analysis/MemorySSAUpdater.cpp =================================================================== --- llvm/trunk/lib/Analysis/MemorySSAUpdater.cpp +++ llvm/trunk/lib/Analysis/MemorySSAUpdater.cpp @@ -37,15 +37,26 @@ // that there are two or more definitions needing to be merged. // This still will leave non-minimal form in the case of irreducible control // flow, where phi nodes may be in cycles with themselves, but unnecessary. -MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(BasicBlock *BB) { - // Single predecessor case, just recurse, we can only have one definition. - if (BasicBlock *Pred = BB->getSinglePredecessor()) { - return getPreviousDefFromEnd(Pred); +MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive( + BasicBlock *BB, + DenseMap> &CachedPreviousDef) { + // First, do a cache lookup. Without this cache, certain CFG structures + // (like a series of if statements) take exponential time to visit. + auto Cached = CachedPreviousDef.find(BB); + if (Cached != CachedPreviousDef.end()) { + return Cached->second; + } else if (BasicBlock *Pred = BB->getSinglePredecessor()) { + // Single predecessor case, just recurse, we can only have one definition. + MemoryAccess *Result = getPreviousDefFromEnd(Pred, CachedPreviousDef); + CachedPreviousDef.insert({BB, Result}); + return Result; } else if (VisitedBlocks.count(BB)) { // We hit our node again, meaning we had a cycle, we must insert a phi // node to break it so we have an operand. The only case this will // insert useless phis is if we have irreducible control flow. - return MSSA->createMemoryPhi(BB); + MemoryAccess *Result = MSSA->createMemoryPhi(BB); + CachedPreviousDef.insert({BB, Result}); + return Result; } else if (VisitedBlocks.insert(BB).second) { // Mark us visited so we can detect a cycle SmallVector PhiOps; @@ -54,7 +65,7 @@ // potential phi node. This will insert phi nodes if we cycle in order to // break the cycle and have an operand. for (auto *Pred : predecessors(BB)) - PhiOps.push_back(getPreviousDefFromEnd(Pred)); + PhiOps.push_back(getPreviousDefFromEnd(Pred, CachedPreviousDef)); // Now try to simplify the ops to avoid placing a phi. // This may return null if we never created a phi yet, that's okay @@ -90,6 +101,7 @@ // Set ourselves up for the next variable by resetting visited state. VisitedBlocks.erase(BB); + CachedPreviousDef.insert({BB, Result}); return Result; } llvm_unreachable("Should have hit one of the three cases above"); @@ -100,9 +112,10 @@ // it continues globally, creating phi nodes to ensure we have a single // definition. MemoryAccess *MemorySSAUpdater::getPreviousDef(MemoryAccess *MA) { - auto *LocalResult = getPreviousDefInBlock(MA); - - return LocalResult ? LocalResult : getPreviousDefRecursive(MA->getBlock()); + if (auto *LocalResult = getPreviousDefInBlock(MA)) + return LocalResult; + DenseMap> CachedPreviousDef; + return getPreviousDefRecursive(MA->getBlock(), CachedPreviousDef); } // This starts at the memory access, and goes backwards in the block to the find @@ -133,13 +146,15 @@ } // This starts at the end of block -MemoryAccess *MemorySSAUpdater::getPreviousDefFromEnd(BasicBlock *BB) { +MemoryAccess *MemorySSAUpdater::getPreviousDefFromEnd( + BasicBlock *BB, + DenseMap> &CachedPreviousDef) { auto *Defs = MSSA->getWritableBlockDefs(BB); if (Defs) return &*Defs->rbegin(); - return getPreviousDefRecursive(BB); + return getPreviousDefRecursive(BB, CachedPreviousDef); } // Recurse over a set of phi uses to eliminate the trivial ones MemoryAccess *MemorySSAUpdater::recursePhi(MemoryAccess *Phi) { @@ -230,10 +245,8 @@ InsertedPHIs.clear(); // See if we had a local def, and if not, go hunting. - MemoryAccess *DefBefore = getPreviousDefInBlock(MD); - bool DefBeforeSameBlock = DefBefore != nullptr; - if (!DefBefore) - DefBefore = getPreviousDefRecursive(MD->getBlock()); + MemoryAccess *DefBefore = getPreviousDef(MD); + bool DefBeforeSameBlock = DefBefore->getBlock() == MD->getBlock(); // There is a def before us, which means we can replace any store/phi uses // of that thing with us, since we are in the way of whatever was there Index: llvm/trunk/test/Transforms/GVNHoist/hoist-simplify-phi.ll =================================================================== --- llvm/trunk/test/Transforms/GVNHoist/hoist-simplify-phi.ll +++ llvm/trunk/test/Transforms/GVNHoist/hoist-simplify-phi.ll @@ -0,0 +1,54 @@ +; RUN: opt < %s -gvn-hoist -S | FileCheck %s + +; This test is meant to make sure that MemorySSAUpdater works correctly +; in non-trivial cases. + +; CHECK: if.else218: +; CHECK-NEXT: %0 = getelementptr inbounds %s, %s* undef, i32 0, i32 0 +; CHECK-NEXT: %1 = load i32, i32* %0, align 4 + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + +%s = type { i32, %s**, [3 x i8], i8 } + +define void @test() { +entry: + br label %cond.end118 + +cond.end118: ; preds = %entry + br i1 undef, label %cleanup, label %if.end155 + +if.end155: ; preds = %cond.end118 + br label %while.cond + +while.cond: ; preds = %while.body, %if.end155 + br i1 undef, label %while.end, label %while.body + +while.body: ; preds = %while.cond + br label %while.cond + +while.end: ; preds = %while.cond + switch i32 undef, label %if.else218 [ + i32 1, label %cleanup + i32 0, label %if.then174 + ] + +if.then174: ; preds = %while.end + unreachable + +if.else218: ; preds = %while.end + br i1 undef, label %if.then226, label %if.else326 + +if.then226: ; preds = %if.else218 + %size227 = getelementptr inbounds %s, %s* undef, i32 0, i32 0 + %0 = load i32, i32* %size227, align 4 + unreachable + +if.else326: ; preds = %if.else218 + %size330 = getelementptr inbounds %s, %s* undef, i32 0, i32 0 + %1 = load i32, i32* %size330, align 4 + unreachable + +cleanup: ; preds = %while.end, %cond.end118 + ret void +}