diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -47,6 +47,7 @@ class PredIteratorCache; class ScalarEvolution; class SCEV; +class SCEVExpander; class TargetLibraryInfo; class TargetTransformInfo; @@ -357,6 +358,18 @@ bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed); +enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl }; + +/// If the final value of any expressions that are recurrent in the loop can +/// be computed, substitute the exit values from the loop into any instructions +/// outside of the loop that use the final values of the current expressions. +/// Return the number of loop exit values that have been replaced, and the +/// corresponding phi node will be added to DeadInsts. +int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, + ScalarEvolution *SE, SCEVExpander &Rewriter, + DominatorTree *DT, ReplaceExitVal ReplaceExitValue, + SmallVector &DeadInsts); + } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -103,8 +103,6 @@ "verify-indvars", cl::Hidden, cl::desc("Verify the ScalarEvolution result after running indvars")); -enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl }; - static cl::opt ReplaceExitValue( "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), @@ -143,8 +141,6 @@ SmallVector DeadInsts; - bool isValidRewrite(Value *FromVal, Value *ToVal); - bool handleFloatingPointIV(Loop *L, PHINode *PH); bool rewriteNonIntegerIVs(Loop *L); @@ -155,10 +151,7 @@ /// iterations of the loop run when that is unobservable. bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); - bool canLoopBeDeleted(Loop *L, SmallVector &RewritePhiSet); - bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); bool rewriteFirstIterationLoopExitValues(Loop *L); - bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const; bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, const SCEV *ExitCount, @@ -177,58 +170,6 @@ } // end anonymous namespace -/// Return true if the SCEV expansion generated by the rewriter can replace the -/// original value. SCEV guarantees that it produces the same value, but the way -/// it is produced may be illegal IR. Ideally, this function will only be -/// called for verification. -bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { - // If an SCEV expression subsumed multiple pointers, its expansion could - // reassociate the GEP changing the base pointer. This is illegal because the - // final address produced by a GEP chain must be inbounds relative to its - // underlying object. Otherwise basic alias analysis, among other things, - // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid - // producing an expression involving multiple pointers. Until then, we must - // bail out here. - // - // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject - // because it understands lcssa phis while SCEV does not. - Value *FromPtr = FromVal; - Value *ToPtr = ToVal; - if (auto *GEP = dyn_cast(FromVal)) { - FromPtr = GEP->getPointerOperand(); - } - if (auto *GEP = dyn_cast(ToVal)) { - ToPtr = GEP->getPointerOperand(); - } - if (FromPtr != FromVal || ToPtr != ToVal) { - // Quickly check the common case - if (FromPtr == ToPtr) - return true; - - // SCEV may have rewritten an expression that produces the GEP's pointer - // operand. That's ok as long as the pointer operand has the same base - // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the - // base of a recurrence. This handles the case in which SCEV expansion - // converts a pointer type recurrence into a nonrecurrent pointer base - // indexed by an integer recurrence. - - // If the GEP base pointer is a vector of pointers, abort. - if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy()) - return false; - - const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr)); - const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr)); - if (FromBase == ToBase) - return true; - - LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase - << " != " << *ToBase << "\n"); - - return false; - } - return true; -} - /// Determine the insertion point for this user. By default, insert immediately /// before the user. SCEVExpander or LICM will hoist loop invariants out of the /// loop. For PHI nodes, there may be multiple uses, so compute the nearest @@ -522,222 +463,6 @@ return Changed; } -namespace { - -// Collect information about PHI nodes which can be transformed in -// rewriteLoopExitValues. -struct RewritePhi { - PHINode *PN; - - // Ith incoming value. - unsigned Ith; - - // Exit value after expansion. - Value *Val; - - // High Cost when expansion. - bool HighCost; - - RewritePhi(PHINode *P, unsigned I, Value *V, bool H) - : PN(P), Ith(I), Val(V), HighCost(H) {} -}; - -} // end anonymous namespace - -//===----------------------------------------------------------------------===// -// rewriteLoopExitValues - Optimize IV users outside the loop. -// As a side effect, reduces the amount of IV processing within the loop. -//===----------------------------------------------------------------------===// - -bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const { - SmallPtrSet Visited; - SmallVector WorkList; - Visited.insert(I); - WorkList.push_back(I); - while (!WorkList.empty()) { - const Instruction *Curr = WorkList.pop_back_val(); - // This use is outside the loop, nothing to do. - if (!L->contains(Curr)) - continue; - // Do we assume it is a "hard" use which will not be eliminated easily? - if (Curr->mayHaveSideEffects()) - return true; - // Otherwise, add all its users to worklist. - for (auto U : Curr->users()) { - auto *UI = cast(U); - if (Visited.insert(UI).second) - WorkList.push_back(UI); - } - } - return false; -} - -/// Check to see if this loop has a computable loop-invariant execution count. -/// If so, this means that we can compute the final value of any expressions -/// that are recurrent in the loop, and substitute the exit values from the loop -/// into any instructions outside of the loop that use the final values of the -/// current expressions. -/// -/// This is mostly redundant with the regular IndVarSimplify activities that -/// happen later, except that it's more powerful in some cases, because it's -/// able to brute-force evaluate arbitrary instructions as long as they have -/// constant operands at the beginning of the loop. -bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { - // Check a pre-condition. - assert(L->isRecursivelyLCSSAForm(*DT, *LI) && - "Indvars did not preserve LCSSA!"); - - SmallVector ExitBlocks; - L->getUniqueExitBlocks(ExitBlocks); - - SmallVector RewritePhiSet; - // Find all values that are computed inside the loop, but used outside of it. - // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan - // the exit blocks of the loop to find them. - for (BasicBlock *ExitBB : ExitBlocks) { - // If there are no PHI nodes in this exit block, then no values defined - // inside the loop are used on this path, skip it. - PHINode *PN = dyn_cast(ExitBB->begin()); - if (!PN) continue; - - unsigned NumPreds = PN->getNumIncomingValues(); - - // Iterate over all of the PHI nodes. - BasicBlock::iterator BBI = ExitBB->begin(); - while ((PN = dyn_cast(BBI++))) { - if (PN->use_empty()) - continue; // dead use, don't replace it - - if (!SE->isSCEVable(PN->getType())) - continue; - - // It's necessary to tell ScalarEvolution about this explicitly so that - // it can walk the def-use list and forget all SCEVs, as it may not be - // watching the PHI itself. Once the new exit value is in place, there - // may not be a def-use connection between the loop and every instruction - // which got a SCEVAddRecExpr for that loop. - SE->forgetValue(PN); - - // Iterate over all of the values in all the PHI nodes. - for (unsigned i = 0; i != NumPreds; ++i) { - // If the value being merged in is not integer or is not defined - // in the loop, skip it. - Value *InVal = PN->getIncomingValue(i); - if (!isa(InVal)) - continue; - - // If this pred is for a subloop, not L itself, skip it. - if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) - continue; // The Block is in a subloop, skip it. - - // Check that InVal is defined in the loop. - Instruction *Inst = cast(InVal); - if (!L->contains(Inst)) - continue; - - // Okay, this instruction has a user outside of the current loop - // and varies predictably *inside* the loop. Evaluate the value it - // contains when the loop exits, if possible. We prefer to start with - // expressions which are true for all exits (so as to maximize - // expression reuse by the SCEVExpander), but resort to per-exit - // evaluation if that fails. - const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); - if (isa(ExitValue) || - !SE->isLoopInvariant(ExitValue, L) || - !isSafeToExpand(ExitValue, *SE)) { - // TODO: This should probably be sunk into SCEV in some way; maybe a - // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for - // most SCEV expressions and other recurrence types (e.g. shift - // recurrences). Is there existing code we can reuse? - const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i)); - if (isa(ExitCount)) - continue; - if (auto *AddRec = dyn_cast(SE->getSCEV(Inst))) - if (AddRec->getLoop() == L) - ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE); - if (isa(ExitValue) || - !SE->isLoopInvariant(ExitValue, L) || - !isSafeToExpand(ExitValue, *SE)) - continue; - } - - // Computing the value outside of the loop brings no benefit if it is - // definitely used inside the loop in a way which can not be optimized - // away. Avoid doing so unless we know we have a value which computes - // the ExitValue already. TODO: This should be merged into SCEV - // expander to leverage its knowledge of existing expressions. - if (ReplaceExitValue != AlwaysRepl && - !isa(ExitValue) && !isa(ExitValue) && - hasHardUserWithinLoop(L, Inst)) - continue; - - bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst); - Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); - - LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal - << '\n' - << " LoopVal = " << *Inst << "\n"); - - if (!isValidRewrite(Inst, ExitVal)) { - DeadInsts.push_back(ExitVal); - continue; - } - -#ifndef NDEBUG - // If we reuse an instruction from a loop which is neither L nor one of - // its containing loops, we end up breaking LCSSA form for this loop by - // creating a new use of its instruction. - if (auto *ExitInsn = dyn_cast(ExitVal)) - if (auto *EVL = LI->getLoopFor(ExitInsn->getParent())) - if (EVL != L) - assert(EVL->contains(L) && "LCSSA breach detected!"); -#endif - - // Collect all the candidate PHINodes to be rewritten. - RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost); - } - } - } - - bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet); - - bool Changed = false; - // Transformation. - for (const RewritePhi &Phi : RewritePhiSet) { - PHINode *PN = Phi.PN; - Value *ExitVal = Phi.Val; - - // Only do the rewrite when the ExitValue can be expanded cheaply. - // If LoopCanBeDel is true, rewrite exit value aggressively. - if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) { - DeadInsts.push_back(ExitVal); - continue; - } - - Changed = true; - ++NumReplaced; - Instruction *Inst = cast(PN->getIncomingValue(Phi.Ith)); - PN->setIncomingValue(Phi.Ith, ExitVal); - - // If this instruction is dead now, delete it. Don't do it now to avoid - // invalidating iterators. - if (isInstructionTriviallyDead(Inst, TLI)) - DeadInsts.push_back(Inst); - - // Replace PN with ExitVal if that is legal and does not break LCSSA. - if (PN->getNumIncomingValues() == 1 && - LI->replacementPreservesLCSSAForm(PN, ExitVal)) { - PN->replaceAllUsesWith(ExitVal); - PN->eraseFromParent(); - } - } - - // The insertion point instruction may have been deleted; clear it out - // so that the rewriter doesn't trip over it later. - Rewriter.clearInsertPoint(); - return Changed; -} - //===---------------------------------------------------------------------===// // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know // they will exit at the first iteration. @@ -813,61 +538,6 @@ return MadeAnyChanges; } -/// Check whether it is possible to delete the loop after rewriting exit -/// value. If it is possible, ignore ReplaceExitValue and do rewriting -/// aggressively. -bool IndVarSimplify::canLoopBeDeleted( - Loop *L, SmallVector &RewritePhiSet) { - BasicBlock *Preheader = L->getLoopPreheader(); - // If there is no preheader, the loop will not be deleted. - if (!Preheader) - return false; - - // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1. - // We obviate multiple ExitingBlocks case for simplicity. - // TODO: If we see testcase with multiple ExitingBlocks can be deleted - // after exit value rewriting, we can enhance the logic here. - SmallVector ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); - SmallVector ExitBlocks; - L->getUniqueExitBlocks(ExitBlocks); - if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1) - return false; - - BasicBlock *ExitBlock = ExitBlocks[0]; - BasicBlock::iterator BI = ExitBlock->begin(); - while (PHINode *P = dyn_cast(BI)) { - Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]); - - // If the Incoming value of P is found in RewritePhiSet, we know it - // could be rewritten to use a loop invariant value in transformation - // phase later. Skip it in the loop invariant check below. - bool found = false; - for (const RewritePhi &Phi : RewritePhiSet) { - unsigned i = Phi.Ith; - if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) { - found = true; - break; - } - } - - Instruction *I; - if (!found && (I = dyn_cast(Incoming))) - if (!L->hasLoopInvariantOperands(I)) - return false; - - ++BI; - } - - for (auto *BB : L->blocks()) - if (llvm::any_of(*BB, [](Instruction &I) { - return I.mayHaveSideEffects(); - })) - return false; - - return true; -} - //===----------------------------------------------------------------------===// // IV Widening - Extend the width of an IV to cover its widest uses. //===----------------------------------------------------------------------===// @@ -3016,8 +2686,13 @@ // that are recurrent in the loop, and substitute the exit values from the // loop into any instructions outside of the loop that use the final values // of the current expressions. - if (ReplaceExitValue != NeverRepl) - Changed |= rewriteLoopExitValues(L, Rewriter); + if (ReplaceExitValue != NeverRepl) { + if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, Rewriter, DT, + ReplaceExitValue, DeadInsts)) { + NumReplaced += Rewrites; + Changed = true; + } + } // Eliminate redundant IV cycles. NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); @@ -3028,7 +2703,7 @@ // Given we've changed exit counts, notify SCEV SE->forgetLoop(L); } - + // Try to form loop invariant tests for loop exits by changing how many // iterations of the loop run when that is unobservable. if (predicateLoopExits(L, Rewriter)) { @@ -3038,7 +2713,7 @@ } // If we have a trip count expression, rewrite the loop's exit condition - // using it. + // using it. if (!DisableLFTR) { SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); @@ -3049,10 +2724,10 @@ // If our exitting block exits multiple loops, we can only rewrite the // innermost one. Otherwise, we're changing how many times the innermost - // loop runs before it exits. + // loop runs before it exits. if (LI->getLoopFor(ExitingBB) != L) continue; - + if (!needsLFTR(L, ExitingBB)) continue; @@ -3066,13 +2741,13 @@ // until stable to handle cases like this better. if (ExitCount->isZero()) continue; - + PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); if (!IndVar) continue; - + // Avoid high cost expansions. Note: This heuristic is questionable in - // that our definition of "high cost" is not exactly principled. + // that our definition of "high cost" is not exactly principled. if (Rewriter.isHighCostExpansion(ExitCount, L)) continue; @@ -3081,7 +2756,7 @@ // any pass that uses the SCEVExpander must do it. This does not work // well for loop passes because SCEVExpander makes assumptions about // all loops, while LoopPassManager only forces the current loop to be - // simplified. + // simplified. // // FIXME: SCEV expansion has no way to bail out, so the caller must // explicitly check any assumptions made by SCEV. Brittle. diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -39,6 +40,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -1042,3 +1044,307 @@ SE.isLoopEntryGuardedByCond(L, Predicate, S, SE.getConstant(Max)); } + +//===----------------------------------------------------------------------===// +// rewriteLoopExitValues - Optimize IV users outside the loop. +// As a side effect, reduces the amount of IV processing within the loop. +//===----------------------------------------------------------------------===// + +// Return true if the SCEV expansion generated by the rewriter can replace the +// original value. SCEV guarantees that it produces the same value, but the way +// it is produced may be illegal IR. Ideally, this function will only be +// called for verification. +static bool isValidRewrite(ScalarEvolution *SE, Value *FromVal, Value *ToVal) { + // If an SCEV expression subsumed multiple pointers, its expansion could + // reassociate the GEP changing the base pointer. This is illegal because the + // final address produced by a GEP chain must be inbounds relative to its + // underlying object. Otherwise basic alias analysis, among other things, + // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid + // producing an expression involving multiple pointers. Until then, we must + // bail out here. + // + // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject + // because it understands lcssa phis while SCEV does not. + Value *FromPtr = FromVal; + Value *ToPtr = ToVal; + if (auto *GEP = dyn_cast(FromVal)) + FromPtr = GEP->getPointerOperand(); + + if (auto *GEP = dyn_cast(ToVal)) + ToPtr = GEP->getPointerOperand(); + + if (FromPtr != FromVal || ToPtr != ToVal) { + // Quickly check the common case + if (FromPtr == ToPtr) + return true; + + // SCEV may have rewritten an expression that produces the GEP's pointer + // operand. That's ok as long as the pointer operand has the same base + // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the + // base of a recurrence. This handles the case in which SCEV expansion + // converts a pointer type recurrence into a nonrecurrent pointer base + // indexed by an integer recurrence. + + // If the GEP base pointer is a vector of pointers, abort. + if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy()) + return false; + + const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr)); + const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr)); + if (FromBase == ToBase) + return true; + + LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: GEP rewrite bail out " + << *FromBase << " != " << *ToBase << "\n"); + + return false; + } + return true; +} + +static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) { + SmallPtrSet Visited; + SmallVector WorkList; + Visited.insert(I); + WorkList.push_back(I); + while (!WorkList.empty()) { + const Instruction *Curr = WorkList.pop_back_val(); + // This use is outside the loop, nothing to do. + if (!L->contains(Curr)) + continue; + // Do we assume it is a "hard" use which will not be eliminated easily? + if (Curr->mayHaveSideEffects()) + return true; + // Otherwise, add all its users to worklist. + for (auto U : Curr->users()) { + auto *UI = cast(U); + if (Visited.insert(UI).second) + WorkList.push_back(UI); + } + } + return false; +} + +// Collect information about PHI nodes which can be transformed in +// rewriteLoopExitValues. +struct RewritePhi { + PHINode *PN; + unsigned Ith; // Ith incoming value. + Value *Val; // Exit value after expansion. + bool HighCost; // High Cost when expansion. + + RewritePhi(PHINode *P, unsigned I, Value *V, bool H) + : PN(P), Ith(I), Val(V), HighCost(H) {} +}; + +// Check whether it is possible to delete the loop after rewriting exit +// value. If it is possible, ignore ReplaceExitValue and do rewriting +// aggressively. +static bool canLoopBeDeleted(Loop *L, SmallVector &RewritePhiSet) { + BasicBlock *Preheader = L->getLoopPreheader(); + // If there is no preheader, the loop will not be deleted. + if (!Preheader) + return false; + + // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1. + // We obviate multiple ExitingBlocks case for simplicity. + // TODO: If we see testcase with multiple ExitingBlocks can be deleted + // after exit value rewriting, we can enhance the logic here. + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + SmallVector ExitBlocks; + L->getUniqueExitBlocks(ExitBlocks); + if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1) + return false; + + BasicBlock *ExitBlock = ExitBlocks[0]; + BasicBlock::iterator BI = ExitBlock->begin(); + while (PHINode *P = dyn_cast(BI)) { + Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]); + + // If the Incoming value of P is found in RewritePhiSet, we know it + // could be rewritten to use a loop invariant value in transformation + // phase later. Skip it in the loop invariant check below. + bool found = false; + for (const RewritePhi &Phi : RewritePhiSet) { + unsigned i = Phi.Ith; + if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) { + found = true; + break; + } + } + + Instruction *I; + if (!found && (I = dyn_cast(Incoming))) + if (!L->hasLoopInvariantOperands(I)) + return false; + + ++BI; + } + + for (auto *BB : L->blocks()) + if (llvm::any_of(*BB, [](Instruction &I) { + return I.mayHaveSideEffects(); + })) + return false; + + return true; +} + +int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, + TargetLibraryInfo *TLI, ScalarEvolution *SE, SCEVExpander &Rewriter, + DominatorTree *DT, ReplaceExitVal ReplaceExitValue, + SmallVector &DeadInsts) { + // Check a pre-condition. + assert(L->isRecursivelyLCSSAForm(*DT, *LI) && + "Indvars did not preserve LCSSA!"); + + SmallVector ExitBlocks; + L->getUniqueExitBlocks(ExitBlocks); + + SmallVector RewritePhiSet; + // Find all values that are computed inside the loop, but used outside of it. + // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan + // the exit blocks of the loop to find them. + for (BasicBlock *ExitBB : ExitBlocks) { + // If there are no PHI nodes in this exit block, then no values defined + // inside the loop are used on this path, skip it. + PHINode *PN = dyn_cast(ExitBB->begin()); + if (!PN) continue; + + unsigned NumPreds = PN->getNumIncomingValues(); + + // Iterate over all of the PHI nodes. + BasicBlock::iterator BBI = ExitBB->begin(); + while ((PN = dyn_cast(BBI++))) { + if (PN->use_empty()) + continue; // dead use, don't replace it + + if (!SE->isSCEVable(PN->getType())) + continue; + + // It's necessary to tell ScalarEvolution about this explicitly so that + // it can walk the def-use list and forget all SCEVs, as it may not be + // watching the PHI itself. Once the new exit value is in place, there + // may not be a def-use connection between the loop and every instruction + // which got a SCEVAddRecExpr for that loop. + SE->forgetValue(PN); + + // Iterate over all of the values in all the PHI nodes. + for (unsigned i = 0; i != NumPreds; ++i) { + // If the value being merged in is not integer or is not defined + // in the loop, skip it. + Value *InVal = PN->getIncomingValue(i); + if (!isa(InVal)) + continue; + + // If this pred is for a subloop, not L itself, skip it. + if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) + continue; // The Block is in a subloop, skip it. + + // Check that InVal is defined in the loop. + Instruction *Inst = cast(InVal); + if (!L->contains(Inst)) + continue; + + // Okay, this instruction has a user outside of the current loop + // and varies predictably *inside* the loop. Evaluate the value it + // contains when the loop exits, if possible. We prefer to start with + // expressions which are true for all exits (so as to maximize + // expression reuse by the SCEVExpander), but resort to per-exit + // evaluation if that fails. + const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); + if (isa(ExitValue) || + !SE->isLoopInvariant(ExitValue, L) || + !isSafeToExpand(ExitValue, *SE)) { + // TODO: This should probably be sunk into SCEV in some way; maybe a + // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for + // most SCEV expressions and other recurrence types (e.g. shift + // recurrences). Is there existing code we can reuse? + const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i)); + if (isa(ExitCount)) + continue; + if (auto *AddRec = dyn_cast(SE->getSCEV(Inst))) + if (AddRec->getLoop() == L) + ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE); + if (isa(ExitValue) || + !SE->isLoopInvariant(ExitValue, L) || + !isSafeToExpand(ExitValue, *SE)) + continue; + } + + // Computing the value outside of the loop brings no benefit if it is + // definitely used inside the loop in a way which can not be optimized + // away. Avoid doing so unless we know we have a value which computes + // the ExitValue already. TODO: This should be merged into SCEV + // expander to leverage its knowledge of existing expressions. + if (ReplaceExitValue != AlwaysRepl && + !isa(ExitValue) && !isa(ExitValue) && + hasHardUserWithinLoop(L, Inst)) + continue; + + bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst); + Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); + + LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = " + << *ExitVal << '\n' << " LoopVal = " << *Inst + << "\n"); + + if (!isValidRewrite(SE, Inst, ExitVal)) { + DeadInsts.push_back(ExitVal); + continue; + } + +#ifndef NDEBUG + // If we reuse an instruction from a loop which is neither L nor one of + // its containing loops, we end up breaking LCSSA form for this loop by + // creating a new use of its instruction. + if (auto *ExitInsn = dyn_cast(ExitVal)) + if (auto *EVL = LI->getLoopFor(ExitInsn->getParent())) + if (EVL != L) + assert(EVL->contains(L) && "LCSSA breach detected!"); +#endif + + // Collect all the candidate PHINodes to be rewritten. + RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost); + } + } + } + + bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet); + int NumReplaced = 0; + + // Transformation. + for (const RewritePhi &Phi : RewritePhiSet) { + PHINode *PN = Phi.PN; + Value *ExitVal = Phi.Val; + + // Only do the rewrite when the ExitValue can be expanded cheaply. + // If LoopCanBeDel is true, rewrite exit value aggressively. + if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) { + DeadInsts.push_back(ExitVal); + continue; + } + + NumReplaced++; + Instruction *Inst = cast(PN->getIncomingValue(Phi.Ith)); + PN->setIncomingValue(Phi.Ith, ExitVal); + + // If this instruction is dead now, delete it. Don't do it now to avoid + // invalidating iterators. + if (isInstructionTriviallyDead(Inst, TLI)) + DeadInsts.push_back(Inst); + + // Replace PN with ExitVal if that is legal and does not break LCSSA. + if (PN->getNumIncomingValues() == 1 && + LI->replacementPreservesLCSSAForm(PN, ExitVal)) { + PN->replaceAllUsesWith(ExitVal); + PN->eraseFromParent(); + } + } + + // The insertion point instruction may have been deleted; clear it out + // so that the rewriter doesn't trip over it later. + Rewriter.clearInsertPoint(); + return NumReplaced; +}