Index: llvm/lib/Transforms/Scalar/LoopDeletion.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopDeletion.cpp +++ llvm/lib/Transforms/Scalar/LoopDeletion.cpp @@ -36,6 +36,10 @@ #define DEBUG_TYPE "loop-delete" STATISTIC(NumDeleted, "Number of loops deleted"); +STATISTIC(NumEarlyExits, "Number of early exits inserted"); +STATISTIC(NumEarlyExitedLoops, "Number of loops with early exit inserted"); +STATISTIC(NumEarlyExitedLoopsMultiExit, + "Number of loops with early exit inserted: multi exit"); static cl::opt EnableSymbolicExecution( "loop-deletion-enable-symbolic-execution", cl::Hidden, cl::init(true), @@ -56,6 +60,12 @@ return LoopDeletionResult::Unmodified; } +static bool BBHasSideEffects(const BasicBlock *BB) { + return (any_of(*BB, [](const Instruction &I) { + return I.mayHaveSideEffects() && !I.isDroppable(); + })); +} + /// Determines if a loop is dead. /// /// This assumes that we've already checked for unique exit and exiting blocks, @@ -105,9 +115,7 @@ // This includes instructions that could write to memory, and loads that are // marked volatile. for (auto &I : L->blocks()) - if (any_of(*I, [](Instruction &I) { - return I.mayHaveSideEffects() && !I.isDroppable(); - })) + if (BBHasSideEffects(I)) return false; // The loop or any of its sub-loops looping infinitely is legal. The loop can @@ -503,6 +511,242 @@ return LoopDeletionResult::Deleted; } +// EXPERIMENTAL +// Also handle loops with multiple exits. +static cl::opt EarlyExitExtra("early-exit-extra", cl::init(false), cl::Hidden); + +// The following two functions check if the terminator condition of a BB is +// loop-invariant. If it is not, the edge cannot be redirected since another +// edge may be taken in a following iteration. +static bool usesLoopPHI(const Instruction *I, const Loop *L) { + if (!L->contains(I->getParent())) + return false; + if (isa(I)) + return true; + for (auto &Op : I->operands()) + if (const Instruction *OpI = dyn_cast(&Op)) + if (usesLoopPHI(OpI, L)) + return true; + return false; +} + +static bool BBHasLIVCondOnDeadPath(const BasicBlock *BB, Loop *L) { + // Check if the condition might change after an iteration across a dead + // path. (TODO: worth checking if incoming value from preheader/latch are + // the same and allow that case?) + const Instruction *TI = BB->getTerminator(); + const Instruction *Cond = nullptr; + if (const BranchInst *BI = dyn_cast(TI)) { + if (BI->isConditional()) + Cond = dyn_cast(BI->getCondition()); + } else if (const SwitchInst *SI = dyn_cast(TI)) { + Cond = dyn_cast(SI->getCondition()); + } else + return false; + return (Cond == nullptr || !usesLoopPHI(Cond, L)); +} + +// These two functions are used after an early exit was inserted to remove +// any BB that has become disconnected from the loop. +static bool hasPredecessorInLoop(const BasicBlock *BB, Loop *L) { + for (auto *Pred: predecessors(BB)) + if (L->contains(Pred)) + return true; + return false; +} + +static bool hasSuccessorInLoop(const BasicBlock *BB, Loop *L) { + for (auto *Succ: successors(BB)) + if (L->contains(Succ)) + return true; + return false; +} + +// The most simple version of this algorithm would only consider edges +// between the header and latch. By finding more blocks connected to the +// header or latch, more (~ x2) edges can be redirected. For instance, an +// edge from the header to the latch via a third block is dead and can be +// exited if all blocks are free from sideeffects. The "Top" region +// (connected to the header) is found whe Forward is true, otherwise the +// "Bot" region is computed (connected to the latch). +static void findNoSEBlocks(SmallPtrSet &NoSEBBs, + Loop *L, bool Forward) { + // - Demand loop invariant branch conditions in Top to make sure an edge to + // Bot can be broken safely. + // - Allow varying branch conditions in Bot region as a unique exit block + // is checked for later. + auto takeBlock = [&L, &Forward](BasicBlock *BB) -> bool { + return !BBHasSideEffects(BB) && (!Forward || BBHasLIVCondOnDeadPath(BB, L)); + }; + + BasicBlock *Header = L->getHeader(); + BasicBlock *Latch = L->getLoopLatch(); + BasicBlock *Start = Forward ? Header : Latch; + if (!takeBlock(Start)) + return; + NoSEBBs.insert(Start); + + bool Change = true; + while (Change) { + Change = false; + for (auto BB : L->getBlocks()) { + if (NoSEBBs.count(BB) || !takeBlock(BB)) + continue; + + bool All = true; + if (Forward) { + for (auto *Pred: predecessors(BB)) + if (!NoSEBBs.count(Pred)) { + All = false; + break; + } + } else { + for (auto *Succ: successors(BB)) + if (L->contains(Succ) && !NoSEBBs.count(Succ)) { + All = false; + break; + } + } + if (All) { + NoSEBBs.insert(BB); + Change = true; + } + } + } +} + +// Return the unique exit block for BB (in "Bot"). If there is no such block +// for the loop as a whole, it is still possible there is one on the path +// from BB to the backedge (this results in ~10% more loops/edges being +// optimized). If there are more than one, return null. Also make sure there +// are no live-out values. +static BasicBlock *findExitBlock(const BasicBlock *BB, Loop *L) { + BasicBlock *ExitBB = L->getUniqueExitBlock(); + + if (!ExitBB && EarlyExitExtra) { + SmallVector WorkList; + WorkList.push_back(BB); + SmallPtrSet Visited; + while (!WorkList.empty()) { + const BasicBlock *CurrBB = WorkList.pop_back_val(); + if (!Visited.insert(CurrBB).second) + continue; + + const Instruction *TI = CurrBB->getTerminator(); + for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) + if (BasicBlock *SuccBB = dyn_cast(TI->getOperand(i))) { + if (!L->contains(SuccBB)) { + if (ExitBB != nullptr && ExitBB != SuccBB) + return nullptr; + ExitBB = SuccBB; + } else if (SuccBB != L->getHeader()){ + WorkList.push_back(SuccBB); + } + } + } + } + + return (ExitBB && !isa(ExitBB->begin())) ? ExitBB : nullptr; +} + +#ifndef NDEBUG +static void dumpBBSet(std::string Msg, SmallPtrSet &S) { + dbgs() << Msg << ": "; + for (const BasicBlock *BB : S) + dbgs() << BB->getName() << ", "; + dbgs() << "\n"; +} +#endif + +// In a loop that is not entirely dead, try to find dead paths that if +// entered are taken in all following iterations and can therefore be exited +// immediately. +static LoopDeletionResult tryInsertEarlyExit(Loop *L, DominatorTree &DT, + ScalarEvolution &SE, LoopInfo &LI, + MemorySSA *MSSA, + OptimizationRemarkEmitter &ORE) { + assert(L->isLCSSAForm(DT) && "Expected LCSSA!"); + BasicBlock *LatchBB = L->getLoopLatch(); + if (!LatchBB || L->getNumBlocks() == 1) + return LoopDeletionResult::Unmodified; + + // Check for a known trip count or a forward progress gurantee. + const SCEV *S = SE.getConstantMaxBackedgeTakenCount(L); + if (isa(S) && !LatchBB->getParent()->mustProgress() && + !hasMustProgress(L)) + return LoopDeletionResult::Unmodified; + + // Compute two sets of blocks which are free of side-effets. Top starts + // from header, and Bot starts from latch. + SmallPtrSet Top, Bot; + findNoSEBlocks(Top, L, true /*Forward*/); + findNoSEBlocks(Bot, L, false /*Forward*/); + LLVM_DEBUG(dbgs() << "Trying to insert early exits:\n"; + dumpBBSet("Top region", Top); + dumpBBSet("Bot region", Bot);); + + bool Change = false; + for (auto BB : Top) { + Instruction *TI = BB->getTerminator(); + // We can replace an edge if the conditions will always evaluate the same + // on the dead path from header to BB and with only one possible exit + // block from the target BB in the bottom region. + for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) + if (BasicBlock *SuccBB = dyn_cast(TI->getOperand(i))) + if (Bot.count(SuccBB)) + if (BasicBlock *ExitBB = findExitBlock(SuccBB, L)) { + LLVM_DEBUG(dbgs() << "Inserting early exit in " << BB->getName() << ":\n";); + LLVM_DEBUG(TI->dump(); dbgs() << " =>\n";); + TI->setOperand(i, ExitBB); + for (PHINode &Phi : SuccBB->phis()) + Phi.removeIncomingValue(BB); + LLVM_DEBUG(TI->dump();); + Change = true; + NumEarlyExits++; + } + } + + if (!Change) + return LoopDeletionResult::Unmodified; + + NumEarlyExitedLoops++; + if (!L->getUniqueExitBlock()) + NumEarlyExitedLoopsMultiExit++; + + // Update data structures. (Correct? Hopefully there is a better way..?) + SE.forgetLoop(L); + DT.recalculate(*const_cast(LatchBB->getParent())); + Loop *CurrLoop = L; + while (CurrLoop != nullptr) { + // Iteratively remove disconnected blocks from CurrLoop. + Change = true; + while (Change) { + Change = false; + std::vector LoopBlocks(CurrLoop->block_begin(), + CurrLoop->block_end()); + for (auto BB : LoopBlocks) + if (!hasPredecessorInLoop(BB, CurrLoop) || + !hasSuccessorInLoop(BB, CurrLoop)) { + LLVM_DEBUG(dbgs() << "Removing block from loop: " + << BB->getName() << "\n";); + CurrLoop->removeBlockFromLoop(BB); + Change = true; + } + } + CurrLoop = CurrLoop->getParentLoop(); + } + + // If all blocks have been removed, return 'Deleted', or LoopPass will try + // to dump it which doesn't work with an emtpy loop. + LoopDeletionResult Result = L->getNumBlocks() ? LoopDeletionResult::Modified + : LoopDeletionResult::Deleted; + if (Result == LoopDeletionResult::Deleted) { + LI.erase(L); + LLVM_DEBUG(dbgs() << "Loop eliminated: all blocks removed.\n"; ); + } + return Result; +} + PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &Updater) { @@ -523,6 +767,10 @@ Result = merge(Result, breakBackedgeIfNotTaken(&L, AR.DT, AR.SE, AR.LI, AR.MSSA, ORE)); + if (Result != LoopDeletionResult::Deleted) + Result = merge(Result, tryInsertEarlyExit(&L, AR.DT, AR.SE, AR.LI, AR.MSSA, + ORE)); + if (Result == LoopDeletionResult::Unmodified) return PreservedAnalyses::all(); Index: llvm/test/Transforms/LoopDeletion/early-exits.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopDeletion/early-exits.ll @@ -0,0 +1,482 @@ +; RUN: opt < %s -loop-deletion -early-exit-extra -S | FileCheck %s +; +; Test insertion of early exits from dead paths. + +@g = external global i8 + +; Known trip count. If %loop branches to %latch, the loop is dead. +define void @f0() { +; CHECK-LABEL: @f0( +entry: + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK: br i1 %cmp, label %body, label %exit + %IV = phi i64 [ 0, %entry ], [ %next, %latch ] + %b = load i8, i8* @g + %cmp = icmp ne i8 %b, 0 + br i1 %cmp, label %body, label %latch + +body: + call void @foo() + br label %latch + +latch: + %next = add i64 %IV, 1 + %cmp2 = icmp ne i64 %IV, 128 + br i1 %cmp2, label %loop, label %exit + +exit: + ret void +} + +; Loop has mustprogress attribute and @Rb_tree_increment is readonly. +; If %loop branches to %latch, the loop is dead. +%0 = type { i32, %0*, %0*, %0* } +define void @f1() { +; CHECK-LABEL: @f1( +entry: + br label %loop + +loop: ; preds = %entry, %latch +; CHECK-LABEL: loop: +; CHECK: br i1 %i3.i3.not, label %body, label %exit + %i2.i2 = load i8, i8* inttoptr (i64 8 to i8*) + %0 = and i8 %i2.i2, 1 + %i3.i3.not = icmp eq i8 %0, 0 + br i1 %i3.i3.not, label %body, label %latch + +body: ; preds = %loop + tail call void @foo() + br label %latch + +latch: ; preds = %body, %loop + %i2.i = tail call %0* @Rb_tree_increment() #0 + %i5.i.not = icmp eq %0* %i2.i, null + br i1 %i5.i.not, label %exit, label %loop, !llvm.loop !1 + +exit: ; preds = %latch + ret void +} + +; Header has side-effects +define void @f2() { +; CHECK-LABEL: @f2( +entry: + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK: br i1 %cmp, label %body, label %latch + %IV = phi i64 [ 0, %entry ], [ %next, %latch ] + %b = load volatile i8, i8* @g + %cmp = icmp ne i8 %b, 0 + br i1 %cmp, label %body, label %latch + +body: + call void @foo() + br label %latch + +latch: + %next = add i64 %IV, 1 + %cmp2 = icmp ne i64 %IV, 128 + br i1 %cmp2, label %loop, label %exit + +exit: + ret void +} + +; Latch has side-effects +define void @f3(i64* %dst) { +; CHECK-LABEL: @f3( +entry: + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK: br i1 %cmp, label %body, label %latch + %IV = phi i64 [ 0, %entry ], [ %next, %latch ] + %b = load i8, i8* @g + %cmp = icmp ne i8 %b, 0 + br i1 %cmp, label %body, label %latch + +body: + call void @foo() + br label %latch + +latch: + %next = add i64 %IV, 1 + store volatile i64 0, i64* %dst + %cmp2 = icmp ne i64 %IV, 128 + br i1 %cmp2, label %loop, label %exit + +exit: + ret void +} + +; Condition is not loop-invariant +define void @f4(i64 %src) { +; CHECK-LABEL: @f4( +entry: + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK: br i1 %cmp, label %body, label %latch + %IV = phi i64 [ %src, %entry ], [ %next, %latch ] + %cmp = icmp eq i64 %IV, 0 + br i1 %cmp, label %body, label %latch + +body: + call void @foo() + br label %latch + +latch: + %next = add i64 %IV, 1 + %cmp2 = icmp ne i64 %IV, 128 + br i1 %cmp2, label %loop, label %exit + +exit: + ret void +} + +; Header successors: one has side-effects. +define void @f5(i64 %src) { +; CHECK-LABEL: @f5( +entry: + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK: br i1 %cmp, label %exit, label %body1 + %IV = phi i64 [ %src, %entry ], [ %next, %latch ] + %b = load i8, i8* @g + %cmp = icmp eq i8 %b, 0 + br i1 %cmp, label %body0, label %body1 + +body0: +; CHECK-LABEL: body0: +; CHECK: br label %exit + br label %latch + +body1: +; CHECK-LABEL: body1: +; CHECK: br i1 %cmp1, label %body2, label %latch + call void @foo() + %cmp1 = icmp ne i8 %b, 2 + br i1 %cmp1, label %body2, label %latch + +body2: +; CHECK-LABEL: body2: +; CHECK: br label %latch + call void @foo() + br label %latch + +latch: + %next = add i64 %IV, 1 + %cmp2 = icmp ne i64 %IV, 128 + br i1 %cmp2, label %loop, label %exit + +exit: + ret void +} + +; Switch instruction and multiple latches: only early-exit from those with no +; side-effects. +define void @f6(i64 %src) mustprogress { +; CHECK-LABEL: @f6( +entry: + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK: switch i64 %src, label %exit [ +; CHECK-NEXT: i64 2, label %latch2 +; CHECK-NEXT: i64 4, label %exit + %IV = phi i64 [ %src, %entry ], [ %next1, %latch1 ], + [ %next2, %latch2 ], [ %next4, %latch4 ] + switch i64 %src, label %latch1 [ i64 2, label %latch2 + i64 4, label %latch4 ] + +latch1: + %next1 = add i64 %IV, 1 + %cmp1 = icmp ne i64 %IV, 128 + br i1 %cmp1, label %loop, label %exit + +latch2: + call void @foo() + %next2 = add i64 %IV, 2 + %cmp2 = icmp ne i64 %IV, 128 + br i1 %cmp2, label %loop, label %exit + +latch4: + %next4 = add i64 %IV, 4 + %cmp4 = icmp ne i64 %IV, 128 + br i1 %cmp4, label %loop, label %exit + +exit: + ret void +} + +; This loop has two exits which means deleteLoopIfDead() will bail. The loop +; can be eliminated after inserting an early exit. +define void @f7(i1 %arg, i1 %arg2) mustprogress { +; CHECK-LABEL: @f7( +entry: + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK: br i1 %arg, label %exit1, label %exit0 + br i1 %arg, label %exit1, label %body + +body: ; preds = %bb1 + br i1 %arg2, label %exit0, label %latch + +exit0: ; preds = %bb2 + unreachable + +latch: ; preds = %bb2 + br label %loop + +exit1: ; preds = %bb1 + ret void +} + +; Exiting block with an edge to a no-side-effects latch +define void @f8(i64* %dst) { +; CHECK-LABEL: @f8( +entry: + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK: br i1 %cmp, label %body, label %body2 + %IV = phi i64 [ 0, %entry ], [ %next, %latch ] + %b = load i8, i8* @g + %cmp = icmp ne i8 %b, 0 + br i1 %cmp, label %body, label %body2 + +body: + call void @foo() + br label %latch + +body2: +; CHECK-LABEL: body2: +; CHECK: br i1 false, label %latch, label %exit + br i1 false, label %latch, label %exit + +latch: + %next = add i64 %IV, 1 + store volatile i64 0, i64* %dst + %cmp2 = icmp ne i64 %IV, 128 + br i1 %cmp2, label %loop, label %exit + +exit: + ret void +} + +; Branch from Top (not header) to a Bot (not latch) +define void @f9() { +; CHECK-LABEL: @f9( +entry: + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK: br i1 %cmp, label %body, label %body2 + %IV = phi i64 [ 0, %entry ], [ %next, %latch ] + %b = load i8, i8* @g + %cmp = icmp ne i8 %b, 0 + br i1 %cmp, label %body, label %body2 + +body: + call void @foo() + br label %latch + +body2: +; CHECK-LABEL: body2: +; CHECK: br i1 false, label %body, label %exit + br i1 false, label %body, label %bot + +bot: +; CHECK-LABEL: bot: +; CHECK: br label %exit + br label %latch + +latch: + %next = add i64 %IV, 1 + %cmp2 = icmp ne i64 %IV, 128 + br i1 %cmp2, label %loop, label %exit + +exit: + ret void +} + +; Multiple exits, but only one on dead path. +define void @f10() { +; CHECK-LABEL: @f10( +entry: + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK: br i1 %cmp, label %body, label %exit + %IV = phi i64 [ 0, %entry ], [ %next, %latch ] + %b = load i8, i8* @g + %cmp = icmp ne i8 %b, 0 + br i1 %cmp, label %body, label %bot1 + +body: + call void @foo() + br label %latch + +bot0: + br i1 false, label %bot1, label %bot2 + +bot1: +; CHECK-LABEL: bot1: +; CHECK: br i1 false, label %exit, label %exit + br i1 false, label %latch, label %exit + +bot2: + br i1 false, label %latch, label %exit2 + +latch: + %next = add i64 %IV, 1 + %cmp2 = icmp ne i64 %IV, 128 + br i1 %cmp2, label %loop, label %exit + +exit: + ret void + +exit2: + ret void +} + +; Branches to two different blocks in Bot, two different and usable exit blocks. +define void @f11() mustprogress { +; CHECK-LABEL: @f11( +entry: + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK: br i1 %cmp, label %body, label %top1 + %IV = phi i64 [ 0, %entry ], [ %next, %latch ] + %b = load i8, i8* @g + %cmp = icmp ne i8 %b, 0 + br i1 %cmp, label %body, label %top1 + +body: + call void @foo() + br label %latch + +top1: +; CHECK-LABEL: top1: +; CHECK: br i1 false, label %exit, label %exit2 + br i1 false, label %bot1, label %bot2 + +bot1: + br i1 false, label %latch, label %exit + +bot2: + br i1 false, label %latch, label %exit2 + +latch: + %next = add i64 %IV, 1 + br label %loop + +exit: + ret void + +exit2: + ret void +} + +; %loop branches to %bot1, but not a single exit block. %bot1 branches to %bot2, +; from wich there is only one exit block. +define void @f12() mustprogress { +; CHECK-LABEL: @f12( +entry: + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK: br i1 %cmp, label %body, label %bot1 + %IV = phi i64 [ 0, %entry ], [ %next, %latch ] + %b = load i8, i8* @g + %cmp = icmp ne i8 %b, 0 + br i1 %cmp, label %body, label %bot1 + +body: + call void @foo() + br label %latch + +bot1: +; CHECK-LABEL: bot1: +; CHECK: br i1 false, label %exit2, label %exit + br i1 false, label %bot2, label %exit + +bot2: + br i1 false, label %latch, label %exit2 + +latch: + %next = add i64 %IV, 1 + br label %loop + +exit: + ret void + +exit2: + ret void +} + +; Loop nest +define void @f13() mustprogress { +; CHECK-LABEL: @f13( +entry: + br label %loop1 + +loop1: +; CHECK-LABEL: loop1: +; CHECK: br i1 %cmp, label %loop2.preheader, label %exit1 + %IV = phi i64 [ 0, %entry ], [ %next, %latch1 ] + %b = load i8, i8* @g + %cmp = icmp ne i8 %b, 0 + br i1 %cmp, label %loop2.preheader, label %latch1 + +loop2.preheader: + br label %loop2 + +loop2: +; CHECK-LABEL: loop2: +; CHECK: br i1 false, label %body2, label %exit2 + br i1 false, label %body2, label %latch2 + +body2: + call void @foo() + br label %latch2 + +latch2: + br i1 true, label %loop2, label %exit2 + +exit2: + br label %latch1 + +latch1: + %next = add i64 %IV, 1 + %cmp2 = icmp ne i64 %IV, 128 + br i1 %cmp2, label %loop1, label %exit1 + +exit1: + ret void +} + +declare void @foo() +declare %0* @Rb_tree_increment() +attributes #0 = { nounwind readonly willreturn } +!1 = distinct !{!1, !2, !3} +!2 = !{!"llvm.loop.mustprogress"} +!3 = !{!"llvm.loop.unroll.disable"} Index: llvm/test/Transforms/LoopDeletion/noop-loops-with-subloops.ll =================================================================== --- llvm/test/Transforms/LoopDeletion/noop-loops-with-subloops.ll +++ llvm/test/Transforms/LoopDeletion/noop-loops-with-subloops.ll @@ -306,7 +306,7 @@ ; CHECK-NEXT: br label [[LOOP1:%.*]] ; CHECK: loop1: ; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV1_NEXT:%.*]], [[LOOP1_LATCH:%.*]] ] -; CHECK-NEXT: br i1 [[C1:%.*]], label [[LOOP1_LATCH]], label [[LOOP2_PREHEADER:%.*]] +; CHECK-NEXT: br i1 [[C1:%.*]], label %exit, label [[LOOP2_PREHEADER:%.*]] ; CHECK: loop2.preheader: ; CHECK-NEXT: br label [[LOOP2:%.*]] ; CHECK: loop2: