Index: lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- lib/Transforms/Scalar/IndVarSimplify.cpp +++ lib/Transforms/Scalar/IndVarSimplify.cpp @@ -146,7 +146,8 @@ bool rewriteFirstIterationLoopExitValues(Loop *L); bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const; - bool linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, + bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, + const SCEV *BackedgeTakenCount, PHINode *IndVar, SCEVExpander &Rewriter); bool sinkUnusedInvariants(Loop *L); @@ -1985,23 +1986,17 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE, SCEVExpander &Rewriter) { const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); + dbgs() << *BackedgeTakenCount << "\n"; if (isa(BackedgeTakenCount)) return false; // Better to break the backedge if (BackedgeTakenCount->isZero()) return false; - - // Loops with multiple exits are not currently suported by lftr - if (!L->getExitingBlock()) - return false; - - // Can't rewrite non-branch yet. - if (!isa(L->getExitingBlock()->getTerminator())) - return false; - +#if 0 if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L)) return false; +#endif return true; } @@ -2048,7 +2043,7 @@ /// Given a loop with one backedge and one exit, return the ICmpInst /// controlling the sole loop exit. There is no guarantee that the exiting /// block is also the latch. -static ICmpInst *getLoopTest(Loop *L) { +static ICmpInst *getLoopTest(Loop *L, BasicBlock *ExitingBB) { assert(L->getExitingBlock() && "expected loop exit"); BasicBlock *LatchBlock = L->getLoopLatch(); @@ -2056,7 +2051,7 @@ if (!LatchBlock) return nullptr; - BranchInst *BI = dyn_cast(L->getExitingBlock()->getTerminator()); + BranchInst *BI = dyn_cast(ExitingBB->getTerminator()); assert(BI && "expected exit branch"); return dyn_cast(BI->getCondition()); @@ -2065,8 +2060,12 @@ /// linearFunctionTestReplace policy. Return true unless we can show that the /// current exit test is already sufficiently canonical. static bool needsLFTR(Loop *L) { + auto *ExitingBB = L->getExitingBlock(); + if (!ExitingBB) + // FIXME + return true; // Do LFTR to simplify the exit condition to an ICMP. - ICmpInst *Cond = getLoopTest(L); + ICmpInst *Cond = getLoopTest(L, ExitingBB); if (!Cond) return true; @@ -2192,7 +2191,7 @@ ScalarEvolution *SE) { uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); - Value *Cond = + Value *Cond = !L->getExitingBlock() ? nullptr : cast(L->getExitingBlock()->getTerminator())->getCondition(); // Loop over all of the PHI nodes, looking for a simple counter. @@ -2226,13 +2225,15 @@ // We explicitly allow unknown phis as long as they are already used by // the loop test. In this case we assume that performing LFTR could not // increase the number of undef users. - if (ICmpInst *Cond = getLoopTest(L)) { - if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L) && - Phi != getLoopPhiForCounter(Cond->getOperand(1), L)) { - continue; - } + if (auto *ExitingBB = L->getExitingBlock()) // FIXME? + if (ICmpInst *Cond = getLoopTest(L, ExitingBB)) { + if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L) && + Phi != getLoopPhiForCounter(Cond->getOperand(1), L)) { + continue; + } } } + const SCEV *Init = AR->getStart(); if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { @@ -2261,7 +2262,8 @@ /// Insert an IR expression which computes the value held by the IV IndVar /// (which must be an loop counter w/unit stride) after the backedge of loop L /// is taken IVCount times. -static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, +static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, + const SCEV *IVCount, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE) { assert(isLoopCounter(IndVar, L, SE)); const SCEVAddRecExpr *AR = cast(SE->getSCEV(IndVar)); @@ -2284,7 +2286,7 @@ // Expand the code for the iteration count. assert(SE->isLoopInvariant(IVOffset, L) && "Computed iteration count is not loop invariant!"); - BranchInst *BI = cast(L->getExitingBlock()->getTerminator()); + BranchInst *BI = cast(ExitingBB->getTerminator()); Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI); Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader()); @@ -2328,7 +2330,7 @@ IVLimit = SE->getAddExpr(IVInit, IVCount); } // Expand the code for the iteration count. - BranchInst *BI = cast(L->getExitingBlock()->getTerminator()); + BranchInst *BI = cast(ExitingBB->getTerminator()); IRBuilder<> Builder(BI); assert(SE->isLoopInvariant(IVLimit, L) && "Computed iteration count is not loop invariant!"); @@ -2347,7 +2349,8 @@ /// determine a loop-invariant trip count of the loop, which is actually a much /// broader range than just linear tests. bool IndVarSimplify:: -linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, +linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, + const SCEV *BackedgeTakenCount, PHINode *IndVar, SCEVExpander &Rewriter) { assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition"); @@ -2360,7 +2363,7 @@ // If the exiting block is the same as the backedge block, we prefer to // compare against the post-incremented value, otherwise we must compare // against the preincremented value. - if (L->getExitingBlock() == L->getLoopLatch()) { + if (ExitingBB == L->getLoopLatch()) { // Add one to the "backedge-taken" count to get the trip count. // This addition may overflow, which is valid as long as the comparison is // truncated to BackedgeTakenCount->getType(). @@ -2369,16 +2372,16 @@ // The BackedgeTaken expression contains the number of times that the // backedge branches to the loop header. This is one less than the // number of times the loop executes, so use the incremented indvar. - CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock()); + CmpIndVar = IndVar->getIncomingValueForBlock(ExitingBB); } - Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE); + Value *ExitCnt = genLoopLimit(IndVar, ExitingBB, IVCount, L, Rewriter, SE); assert(ExitCnt->getType()->isPointerTy() == IndVar->getType()->isPointerTy() && "genLoopLimit missed a cast"); // Insert a new icmp_ne or icmp_eq instruction before the branch. - BranchInst *BI = cast(L->getExitingBlock()->getTerminator()); + BranchInst *BI = cast(ExitingBB->getTerminator()); ICmpInst::Predicate P; if (L->contains(BI->getSuccessor(0))) P = ICmpInst::ICMP_NE; @@ -2623,24 +2626,44 @@ // Eliminate redundant IV cycles. NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); + dbgs() << "Before LFTR\n"; + dbgs() << canExpandBackedgeTakenCount(L, SE, Rewriter) << needsLFTR(L) << "\n"; // If we have a trip count expression, rewrite the loop's exit condition // using it. We can currently only handle loops with a single exit. if (!DisableLFTR && canExpandBackedgeTakenCount(L, SE, Rewriter) && needsLFTR(L)) { + dbgs() << "Running LFTR\n"; PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE); if (IndVar) { - // Check preconditions for proper SCEVExpander operation. SCEV does not - // express SCEVExpander's dependencies, such as LoopSimplify. Instead any - // pass that uses the SCEVExpander must do it. This does not work well for - // loop passes because SCEVExpander makes assumptions about all loops, - // while LoopPassManager only forces the current loop to be simplified. - // - // FIXME: SCEV expansion has no way to bail out, so the caller must - // explicitly check any assumptions made by SCEV. Brittle. - const SCEVAddRecExpr *AR = dyn_cast(BackedgeTakenCount); - if (!AR || AR->getLoop()->getLoopPreheader()) - Changed |= linearFunctionTestReplace(L, BackedgeTakenCount, IndVar, - Rewriter); + dbgs() << *IndVar << "\n"; + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (BasicBlock *ExitingBB : ExitingBlocks) { + // Can't rewrite non-branch yet. + if (!isa(ExitingBB->getTerminator())) + continue; + + // Note: Expanding backedge taken count implies ability to get exit + // counts for each exit. TODO: weaken requirements for transform + const SCEV *BETakenCount = SE->getExitCount(L, ExitingBB); + assert(BETakenCount && !isa(BETakenCount)); + dbgs() << *BETakenCount << "\n"; + + // Check preconditions for proper SCEVExpander operation. SCEV does not + // express SCEVExpander's dependencies, such as LoopSimplify. Instead + // any pass that uses the SCEVExpander must do it. This does not work + // well for loop passes because SCEVExpander makes assumptions about + // all loops, while LoopPassManager only forces the current loop to be + // simplified. + // + // FIXME: SCEV expansion has no way to bail out, so the caller must + // explicitly check any assumptions made by SCEV. Brittle. + const SCEVAddRecExpr *AR = dyn_cast(BETakenCount); + if (!AR || AR->getLoop()->getLoopPreheader()) + Changed |= linearFunctionTestReplace(L, ExitingBB, + BETakenCount, IndVar, + Rewriter); + } } } // Clear the rewriter cache, because values that are in the rewriter's cache