Index: include/llvm/IR/BasicBlock.h =================================================================== --- include/llvm/IR/BasicBlock.h +++ include/llvm/IR/BasicBlock.h @@ -208,6 +208,17 @@ return const_cast(this)->getUniquePredecessor(); } + /// \brief Return the successor of this block if it has a unique successor + /// block. Otherwise return a null pointer. + /// + /// Note that unique successor doesn't mean single edge, there can be + /// multiple edges from this block to the unique successor (for example a + /// switch statement with multiple cases having the same destination). + BasicBlock *getUniqueSuccessor(); + const BasicBlock *getUniqueSuccessor() const { + return const_cast(this)->getUniqueSuccessor(); + } + //===--------------------------------------------------------------------===// /// Instruction iterator methods /// Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -4813,6 +4813,36 @@ ScalarEvolution::ExitLimit ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { + // Conservatively check if BB dominates the loop header if we ignore the edges + // entering the loop. In other words, given that we are already in the loop, + // does execution of BB guarantee the execution of the loop header? + auto DominatesHeaderIgnoringLoopEntry = [L](BasicBlock *BB) { + // We follow the chain of unique predecessors and successors from BB until + // we reach the header. The header is allowed to have predecessors other + // than the basic block we reached it by as long as those are all outside + // the loop (i.e. entries into the loop). + + BasicBlock *UniqueSucc = BB; + while (true) { + if (std::next(pred_begin(UniqueSucc)) != pred_end(UniqueSucc)) + return false; + + BasicBlock *Next = UniqueSucc->getUniqueSuccessor(); + if (!Next || Next == L->getHeader()) + break; + + UniqueSucc = Next; + } + + if (UniqueSucc->getUniqueSuccessor() != L->getHeader()) + return false; + + return std::all_of(pred_begin(L->getHeader()), pred_end(L->getHeader()), + [UniqueSucc, L](BasicBlock *BB) { + return UniqueSucc == BB || !L->contains(BB); + }); + }; + // Okay, we've chosen an exiting block. See what condition causes us to // exit at this block and remember the exit block and whether all other targets // lead to the loop header. @@ -4824,8 +4854,12 @@ if (Exit) // Multiple exit successors. return getCouldNotCompute(); Exit = *SI; - } else if (*SI != L->getHeader()) { - MustExecuteLoopHeader = false; + } else if (*SI != L->getHeader() && MustExecuteLoopHeader) { + // TODO: DominatesHeaderIgnoringLoopEntry cannot be true for two different + // values for *SI so if it has already succeeded for some block != *SI, + // then we can substitute DominatesHeaderIgnoringLoopEntry(*SI) with + // false. + MustExecuteLoopHeader &= DominatesHeaderIgnoringLoopEntry(*SI); } // At this point, we know we have a conditional branch that determines whether Index: lib/IR/BasicBlock.cpp =================================================================== --- lib/IR/BasicBlock.cpp +++ lib/IR/BasicBlock.cpp @@ -239,6 +239,25 @@ return PredBB; } +/// getUniqueSuccessor - If this basic block has a unique successor block, +/// return the block, otherwise return a null pointer. +/// Note that unique successor doesn't mean single edge, there can be +/// multiple edges from this block to the unique successor (for example +/// a switch statement with multiple cases having the same destination). +BasicBlock *BasicBlock::getUniqueSuccessor() { + succ_iterator SI = succ_begin(this), E = succ_end(this); + if (SI == E) return nullptr; // No successors. + BasicBlock *SuccBB = *SI; + ++SI; + for (;SI != E; ++SI) { + if (*SI != SuccBB) + return nullptr; + // The same successor appears multiple times in the successor list. + // This is OK. + } + return SuccBB; +} + /// removePredecessor - This method is used to notify a BasicBlock that the /// specified Predecessor of the block is no longer able to reach it. This is /// actually not used to update the Predecessor list, but is actually used to Index: test/Analysis/ScalarEvolution/compute-exit-limit-aggressively.ll =================================================================== --- /dev/null +++ test/Analysis/ScalarEvolution/compute-exit-limit-aggressively.ll @@ -0,0 +1,95 @@ +; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s + +define void @f(i1 %c) { +; CHECK-LABEL: Classifying expressions for: @f + entry: + br label %loop + + loop: +; CHECK: Determining loop execution counts for: @f +; CHECK-NEXT: Loop %loop: backedge-taken count is 999 +; CHECK-NEXT: Loop %loop: max backedge-taken count is 999 + + %idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop.split ] + br i1 %c, label %left, label %right + +left: + br label %continue +right: + br label %continue + +continue: + %idx.inc = add i32 %idx, 1 + %next.it = icmp slt i32 %idx.inc, 1000 + br i1 %next.it, label %loop.split, label %break + + loop.split: + br label %loop + + break: + ret void +} + +define void @g(i1 %c, i1* %unknown) { +; CHECK-LABEL: Classifying expressions for: @g + entry: + br label %loop + + loop: +; CHECK: Determining loop execution counts for: @g +; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %loop: Unpredictable max backedge-taken count. + + %idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop.split ], [ %idx.inc, %left ] + %idx.inc = add i32 %idx, 1 + br i1 %c, label %left, label %right + +left: + %cond = load i1* %unknown + br i1 %cond, label %continue, label %loop + +right: + br label %continue + +continue: + %next.it = icmp slt i32 %idx.inc, 1000 + br i1 %next.it, label %loop.split, label %break + + loop.split: + br label %loop + + break: + ret void +} + +define void @h(i1 %c, i1* %unknown) { +; CHECK-LABEL: Classifying expressions for: @h + entry: + br label %loop + + loop: +; CHECK: Determining loop execution counts for: @h +; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %loop: Unpredictable max backedge-taken count. + + %idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop.split ] + %idx.inc = add i32 %idx, 1 + br i1 %c, label %left, label %right + +left: + %cond = load i1* %unknown + br i1 %cond, label %continue, label %loop.split + +right: + br label %continue + +continue: + %next.it = icmp slt i32 %idx.inc, 1000 + br i1 %next.it, label %loop.split, label %break + + loop.split: + br label %loop + + break: + ret void +}