Index: lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- lib/Transforms/Scalar/IndVarSimplify.cpp +++ lib/Transforms/Scalar/IndVarSimplify.cpp @@ -146,7 +146,8 @@ bool rewriteFirstIterationLoopExitValues(Loop *L); bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const; - bool linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, + bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, + const SCEV *BackedgeTakenCount, PHINode *IndVar, SCEVExpander &Rewriter); bool sinkUnusedInvariants(Loop *L); @@ -1992,17 +1993,6 @@ if (BackedgeTakenCount->isZero()) return false; - // Loops with multiple exits are not currently suported by lftr - if (!L->getExitingBlock()) - return false; - - // Can't rewrite non-branch yet. - if (!isa(L->getExitingBlock()->getTerminator())) - return false; - - if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L)) - return false; - return true; } @@ -2048,15 +2038,14 @@ /// Given a loop with one backedge and one exit, return the ICmpInst /// controlling the sole loop exit. There is no guarantee that the exiting /// block is also the latch. -static ICmpInst *getLoopTest(Loop *L) { - assert(L->getExitingBlock() && "expected loop exit"); +static ICmpInst *getLoopTest(Loop *L, BasicBlock *ExitingBB) { BasicBlock *LatchBlock = L->getLoopLatch(); // Don't bother with LFTR if the loop is not properly simplified. if (!LatchBlock) return nullptr; - BranchInst *BI = dyn_cast(L->getExitingBlock()->getTerminator()); + BranchInst *BI = dyn_cast(ExitingBB->getTerminator()); assert(BI && "expected exit branch"); return dyn_cast(BI->getCondition()); @@ -2064,9 +2053,9 @@ /// linearFunctionTestReplace policy. Return true unless we can show that the /// current exit test is already sufficiently canonical. -static bool needsLFTR(Loop *L) { +static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { // Do LFTR to simplify the exit condition to an ICMP. - ICmpInst *Cond = getLoopTest(L); + ICmpInst *Cond = getLoopTest(L, ExitingBB); if (!Cond) return true; @@ -2188,12 +2177,11 @@ /// BECount may be an i8* pointer type. The pointer difference is already /// valid count without scaling the address stride, so it remains a pointer /// expression as far as SCEV is concerned. -static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount, - ScalarEvolution *SE) { +static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, + const SCEV *BECount, ScalarEvolution *SE) { uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); - Value *Cond = - cast(L->getExitingBlock()->getTerminator())->getCondition(); + Value *Cond = cast(ExitingBB->getTerminator())->getCondition(); // Loop over all of the PHI nodes, looking for a simple counter. PHINode *BestPhi = nullptr; @@ -2226,13 +2214,15 @@ // We explicitly allow unknown phis as long as they are already used by // the loop test. In this case we assume that performing LFTR could not // increase the number of undef users. - if (ICmpInst *Cond = getLoopTest(L)) { - if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L) && - Phi != getLoopPhiForCounter(Cond->getOperand(1), L)) { - continue; - } - } + // TODO: Generlize this to allow *any* loop exit which is known to + // execute on each iteration + if (L->getExitingBlock()) + if (ICmpInst *Cond = getLoopTest(L, ExitingBB)) + if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L) && + Phi != getLoopPhiForCounter(Cond->getOperand(1), L)) + continue; } + const SCEV *Init = AR->getStart(); if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { @@ -2261,7 +2251,8 @@ /// Insert an IR expression which computes the value held by the IV IndVar /// (which must be an loop counter w/unit stride) after the backedge of loop L /// is taken IVCount times. -static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, +static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, + const SCEV *IVCount, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE) { assert(isLoopCounter(IndVar, L, SE)); const SCEVAddRecExpr *AR = cast(SE->getSCEV(IndVar)); @@ -2284,7 +2275,7 @@ // Expand the code for the iteration count. assert(SE->isLoopInvariant(IVOffset, L) && "Computed iteration count is not loop invariant!"); - BranchInst *BI = cast(L->getExitingBlock()->getTerminator()); + BranchInst *BI = cast(ExitingBB->getTerminator()); Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI); Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader()); @@ -2328,7 +2319,7 @@ IVLimit = SE->getAddExpr(IVInit, IVCount); } // Expand the code for the iteration count. - BranchInst *BI = cast(L->getExitingBlock()->getTerminator()); + BranchInst *BI = cast(ExitingBB->getTerminator()); IRBuilder<> Builder(BI); assert(SE->isLoopInvariant(IVLimit, L) && "Computed iteration count is not loop invariant!"); @@ -2347,7 +2338,8 @@ /// determine a loop-invariant trip count of the loop, which is actually a much /// broader range than just linear tests. bool IndVarSimplify:: -linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, +linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, + const SCEV *BackedgeTakenCount, PHINode *IndVar, SCEVExpander &Rewriter) { assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition"); @@ -2360,7 +2352,7 @@ // If the exiting block is the same as the backedge block, we prefer to // compare against the post-incremented value, otherwise we must compare // against the preincremented value. - if (L->getExitingBlock() == L->getLoopLatch()) { + if (ExitingBB == L->getLoopLatch()) { // Add one to the "backedge-taken" count to get the trip count. // This addition may overflow, which is valid as long as the comparison is // truncated to BackedgeTakenCount->getType(). @@ -2369,16 +2361,16 @@ // The BackedgeTaken expression contains the number of times that the // backedge branches to the loop header. This is one less than the // number of times the loop executes, so use the incremented indvar. - CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock()); + CmpIndVar = IndVar->getIncomingValueForBlock(ExitingBB); } - Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE); + Value *ExitCnt = genLoopLimit(IndVar, ExitingBB, IVCount, L, Rewriter, SE); assert(ExitCnt->getType()->isPointerTy() == IndVar->getType()->isPointerTy() && "genLoopLimit missed a cast"); // Insert a new icmp_ne or icmp_eq instruction before the branch. - BranchInst *BI = cast(L->getExitingBlock()->getTerminator()); + BranchInst *BI = cast(ExitingBB->getTerminator()); ICmpInst::Predicate P; if (L->contains(BI->getSuccessor(0))) P = ICmpInst::ICMP_NE; @@ -2567,6 +2559,19 @@ return MadeAnyChanges; } +/// If we have a unique latch block which is also an exiting block from the +/// loop, return it. Otherwise, return null. +static BasicBlock* getLoopExitingLatch(Loop *L) { + if (BasicBlock *Latch = L->getLoopLatch()) { + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (BasicBlock *ExitingBB : ExitingBlocks) + if (ExitingBB == Latch) + return Latch; + } + return nullptr; +} + //===----------------------------------------------------------------------===// // IndVarSimplify driver. Manage several subpasses of IV simplification. //===----------------------------------------------------------------------===// @@ -2624,22 +2629,55 @@ NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); // If we have a trip count expression, rewrite the loop's exit condition - // using it. We can currently only handle loops with a single exit. - if (!DisableLFTR && canExpandBackedgeTakenCount(L, SE, Rewriter) && - needsLFTR(L)) { - PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE); - if (IndVar) { + // using it. + if (!DisableLFTR && canExpandBackedgeTakenCount(L, SE, Rewriter)) { + // For the moment, we only allow when either a) we have a single exit + // loop, or b) we're considering the latch exit of a multiple exit loop. + // TODO: Expand this to all loop exit blocks. + SmallVector ExitingBlocks; + if (auto *ExitingBB = L->getExitingBlock()) + ExitingBlocks.push_back(ExitingBB); + // NOTE TO REVIEWERS: When actually landing, I'm going to commit without + // this next if block (to form an NFC patch), and then land this line and + // the helper function separately. I want any regressions caused to be + // clearly separated between the NFC refactoring and the increase in scope. + if (auto *ExitingBB = getLoopExitingLatch(L)) + ExitingBlocks.push_back(ExitingBB); + for (BasicBlock *ExitingBB : ExitingBlocks) { + // Can't rewrite non-branch yet. + if (!isa(ExitingBB->getTerminator())) + continue; + + if (!needsLFTR(L, ExitingBB)) + continue; + + // Note: Expanding backedge taken count implies ability to get exit + // counts for each exit. + const SCEV *BETakenCount = SE->getExitCount(L, ExitingBB); + assert(BETakenCount && !isa(BETakenCount)); + + PHINode *IndVar = FindLoopCounter(L, ExitingBB, BETakenCount, SE); + if (!IndVar) + continue; + + // Avoid high cost expansions. Note: This heuristic is questionable in + // that our definition of "high cost" is not exactly principled. + if (Rewriter.isHighCostExpansion(BETakenCount, L)) + continue; + // Check preconditions for proper SCEVExpander operation. SCEV does not - // express SCEVExpander's dependencies, such as LoopSimplify. Instead any - // pass that uses the SCEVExpander must do it. This does not work well for - // loop passes because SCEVExpander makes assumptions about all loops, - // while LoopPassManager only forces the current loop to be simplified. + // express SCEVExpander's dependencies, such as LoopSimplify. Instead + // any pass that uses the SCEVExpander must do it. This does not work + // well for loop passes because SCEVExpander makes assumptions about + // all loops, while LoopPassManager only forces the current loop to be + // simplified. // // FIXME: SCEV expansion has no way to bail out, so the caller must // explicitly check any assumptions made by SCEV. Brittle. - const SCEVAddRecExpr *AR = dyn_cast(BackedgeTakenCount); + const SCEVAddRecExpr *AR = dyn_cast(BETakenCount); if (!AR || AR->getLoop()->getLoopPreheader()) - Changed |= linearFunctionTestReplace(L, BackedgeTakenCount, IndVar, + Changed |= linearFunctionTestReplace(L, ExitingBB, + BETakenCount, IndVar, Rewriter); } } Index: lib/Transforms/Scalar/LoopPredication.cpp =================================================================== --- lib/Transforms/Scalar/LoopPredication.cpp +++ lib/Transforms/Scalar/LoopPredication.cpp @@ -230,7 +230,8 @@ cl::init(true)); namespace { -class LoopPredication { + class LoopPredication { + public: /// Represents an induction variable check: /// icmp Pred, , struct LoopICmp { @@ -246,6 +247,7 @@ << ", Limit = " << *Limit << "\n"; } }; + private: AliasAnalysis *AA; ScalarEvolution *SE; @@ -614,6 +616,21 @@ return Builder.CreateAnd(FirstIterationCheck, LimitCheck); } +static void normalizePredicate(ScalarEvolution *SE, + LoopPredication::LoopICmp& RC) { + // LFTR canonicalizes checks to the ICMP_NE form instead of an ULT/SLT form. + // Normalize back to the ULT/SLT form for ease of handling. + if (RC.Pred == ICmpInst::ICMP_NE && + RC.IV->getStepRecurrence(*SE)->isOne() && + SE->isKnownPredicate(ICmpInst::ICMP_ULT, RC.IV->getStart(), RC.Limit)) + RC.Pred = ICmpInst::ICMP_ULT; + if (RC.Pred == ICmpInst::ICMP_NE && + RC.IV->getStepRecurrence(*SE)->isOne() && + SE->isKnownPredicate(ICmpInst::ICMP_SLE, RC.IV->getStart(), RC.Limit)) + RC.Pred = ICmpInst::ICMP_SLT; +} + + /// If ICI can be widened to a loop invariant condition emits the loop /// invariant condition in the loop preheader and return it, otherwise /// returns None. @@ -852,6 +869,7 @@ } }; + normalizePredicate(SE, *Result); if (IsUnsupportedPredicate(Step, Result->Pred)) { LLVM_DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred << ")!\n");