diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h b/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h --- a/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h +++ b/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -19,6 +19,7 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ScalarEvolutionNormalization.h" #include "llvm/Analysis/TargetFolder.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/ValueHandle.h" @@ -171,16 +172,21 @@ ChainedPhis.clear(); } - /// Return true for expressions that may incur non-trivial cost to evaluate - /// at runtime. + /// Return true for expressions that can't be evaluate at runtime + /// within given \b Budged. /// /// At is an optional parameter which specifies point in code where user is /// going to expand this expression. Sometimes this knowledge can lead to a /// more accurate cost estimation. - bool isHighCostExpansion(const SCEV *Expr, Loop *L, + bool isHighCostExpansion(const SCEV *Expr, Loop *L, unsigned Threshold, + const TargetTransformInfo *TTI, const Instruction *At = nullptr) { SmallPtrSet Processed; - return isHighCostExpansionHelper(Expr, L, At, Processed); + int BudgetRemaining = Threshold * TargetTransformInfo::TCC_Basic; + if (isHighCostExpansionHelper(Expr, L, At, BudgetRemaining, TTI, + Processed)) + return true; + return BudgetRemaining < 0; } /// This method returns the canonical induction variable of the specified @@ -322,8 +328,11 @@ LLVMContext &getContext() const { return SE.getContext(); } /// Recursive helper function for isHighCostExpansion. + /// Returns true to indicate that cost computation failed, and the cost is + /// likely high. bool isHighCostExpansionHelper(const SCEV *S, Loop *L, - const Instruction *At, + const Instruction *At, int &BudgetRemaining, + const TargetTransformInfo *TTI, SmallPtrSetImpl &Processed); /// Insert the specified binary operator, doing a small amount of work to diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -376,11 +376,15 @@ /// If the final value of any expressions that are recurrent in the loop can /// be computed, substitute the exit values from the loop into any instructions /// outside of the loop that use the final values of the current expressions. +/// If ReplaceExitValue is OnlyCheapRepl, the replacement is only done if the +/// cost of replacement is within the budget ReplaceExitValueBudget. /// Return the number of loop exit values that have been replaced, and the /// corresponding phi node will be added to DeadInsts. int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, - ScalarEvolution *SE, SCEVExpander &Rewriter, - DominatorTree *DT, ReplaceExitVal ReplaceExitValue, + ScalarEvolution *SE, const TargetTransformInfo *TTI, + SCEVExpander &Rewriter, DominatorTree *DT, + ReplaceExitVal ReplaceExitValue, + unsigned ReplaceExitValueBudget, SmallVector &DeadInsts); /// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h b/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h --- a/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h @@ -15,6 +15,7 @@ #ifndef LLVM_TRANSFORMS_UTILS_SIMPLIFYINDVAR_H #define LLVM_TRANSFORMS_UTILS_SIMPLIFYINDVAR_H +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/ValueHandle.h" namespace llvm { @@ -46,13 +47,15 @@ /// simplifyUsersOfIV - Simplify instructions that use this induction variable /// by using ScalarEvolution to analyze the IV's recurrence. bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI, SmallVectorImpl &Dead, + LoopInfo *LI, const TargetTransformInfo *TTI, + SmallVectorImpl &Dead, SCEVExpander &Rewriter, IVVisitor *V = nullptr); /// SimplifyLoopIVs - Simplify users of induction variables within this /// loop. This does not actually change or add IVs. bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI, SmallVectorImpl &Dead); + LoopInfo *LI, const TargetTransformInfo *TTI, + SmallVectorImpl &Dead); } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h --- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h @@ -80,16 +80,17 @@ LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, - AssumptionCache *AC, OptimizationRemarkEmitter *ORE, - bool PreserveLCSSA, Loop **RemainderLoop = nullptr); + AssumptionCache *AC, + const llvm::TargetTransformInfo *TTI, + OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, + Loop **RemainderLoop = nullptr); -bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, - bool AllowExpensiveTripCount, - bool UseEpilogRemainder, bool UnrollRemainder, - bool ForgetAllSCEV, LoopInfo *LI, - ScalarEvolution *SE, DominatorTree *DT, - AssumptionCache *AC, bool PreserveLCSSA, - Loop **ResultLoop = nullptr); +bool UnrollRuntimeLoopRemainder( + Loop *L, unsigned Count, bool AllowExpensiveTripCount, + bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV, + LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, + const TargetTransformInfo *TTI, bool PreserveLCSSA, + Loop **ResultLoop = nullptr); void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, @@ -104,6 +105,7 @@ unsigned TripMultiple, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, + const TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop = nullptr); @@ -121,7 +123,8 @@ void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, - AssumptionCache *AC); + AssumptionCache *AC, + const TargetTransformInfo *TTI); MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name); diff --git a/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/llvm/lib/Analysis/ScalarEvolutionExpander.cpp --- a/llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -2129,84 +2129,142 @@ } bool SCEVExpander::isHighCostExpansionHelper( - const SCEV *S, Loop *L, const Instruction *At, - SmallPtrSetImpl &Processed) { + const SCEV *S, Loop *L, const Instruction *At, int &BudgetRemaining, + const TargetTransformInfo *TTI, SmallPtrSetImpl &Processed) { + if (BudgetRemaining < 0) + return true; // Already run out of budget, give up. - // If we can find an existing value for this scev available at the point "At" - // then consider the expression cheap. + // Was the cost of expansion of this expression already accounted for? + if (!Processed.insert(S).second) + return false; // We have already accounted for this expression. + + // Can we find an existing value for this scev available at the point "At"? if (At && getRelatedExistingExpansion(S, At, L)) - return false; + return false; // Consider the expression to be free. - // Zero/One operand expressions switch (S->getSCEVType()) { case scUnknown: case scConstant: - return false; - case scTruncate: - return isHighCostExpansionHelper(cast(S)->getOperand(), - L, At, Processed); - case scZeroExtend: - return isHighCostExpansionHelper(cast(S)->getOperand(), - L, At, Processed); - case scSignExtend: - return isHighCostExpansionHelper(cast(S)->getOperand(), - L, At, Processed); + return false; // Assume to be zero-cost. } - if (!Processed.insert(S).second) - return false; + // The rest of the logic is recursive! + if (!TTI) + return true; // No cost model - give up. + + if (auto *CastExpr = dyn_cast(S)) { + unsigned Opcode; + switch (S->getSCEVType()) { + case scTruncate: + Opcode = Instruction::Trunc; + break; + case scZeroExtend: + Opcode = Instruction::ZExt; + break; + case scSignExtend: + Opcode = Instruction::SExt; + break; + default: + llvm_unreachable("There are no other cast types."); + } + const SCEV *Op = CastExpr->getOperand(); + BudgetRemaining -= + TTI->getOperationCost(Opcode, S->getType(), Op->getType()); + return isHighCostExpansionHelper(Op, L, At, BudgetRemaining, TTI, + Processed); + } if (auto *UDivExpr = dyn_cast(S)) { - // If the divisor is a power of two and the SCEV type fits in a native - // integer (and the LHS not expensive), consider the division cheap - // irrespective of whether it occurs in the user code since it can be - // lowered into a right shift. - if (auto *SC = dyn_cast(UDivExpr->getRHS())) + // If the divisor is a power of two and the LHS not expensive, + // consider the division as logical right-shift. + if (auto *SC = dyn_cast(UDivExpr->getRHS())) { if (SC->getAPInt().isPowerOf2()) { - if (isHighCostExpansionHelper(UDivExpr->getLHS(), L, At, Processed)) + if (isHighCostExpansionHelper(UDivExpr->getLHS(), L, At, + BudgetRemaining, TTI, Processed) || + isHighCostExpansionHelper(UDivExpr->getRHS(), L, At, + BudgetRemaining, TTI, Processed)) return true; - const DataLayout &DL = - L->getHeader()->getParent()->getParent()->getDataLayout(); - unsigned Width = cast(UDivExpr->getType())->getBitWidth(); - return DL.isIllegalInteger(Width); + BudgetRemaining -= + TTI->getOperationCost(Instruction::LShr, S->getType()); + return BudgetRemaining < 0; } + } // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or // HowManyLessThans produced to compute a precise expression, rather than a // UDiv from the user's code. If we can't find a UDiv in the code with some - // simple searching, assume the former consider UDivExpr expensive to - // compute. + // simple searching, we need to account for it's cost. BasicBlock *ExitingBB = L->getExitingBlock(); - if (!ExitingBB) - return true; + if (At || ExitingBB) { + if (!At) + At = &ExitingBB->back(); - // At the beginning of this function we already tried to find existing value - // for plain 'S'. Now try to lookup 'S + 1' since it is common pattern - // involving division. This is just a simple search heuristic. - if (!At) - At = &ExitingBB->back(); - if (!getRelatedExistingExpansion( - SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), At, L)) + // At the beginning of this function we already tried to find existing + // value for plain 'S'. Now try to lookup 'S + 1' since it is common + // pattern involving division. This is just a simple search heuristic. + if (getRelatedExistingExpansion( + SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), At, L)) + return false; + } + + // Need to count the cost of this UDiv. + if (isHighCostExpansionHelper(UDivExpr->getLHS(), L, At, BudgetRemaining, + TTI, Processed) || + isHighCostExpansionHelper(UDivExpr->getRHS(), L, At, BudgetRemaining, + TTI, Processed)) return true; + BudgetRemaining -= TTI->getOperationCost(Instruction::UDiv, S->getType()); + return BudgetRemaining < 0; } - // HowManyLessThans uses a Max expression whenever the loop is not guarded by - // the exit condition. - if (isa(S)) - return true; - // Recurse past nary expressions, which commonly occur in the // BackedgeTakenCount. They may already exist in program code, and if not, // they are not too expensive rematerialize. if (const SCEVNAryExpr *NAry = dyn_cast(S)) { - for (auto *Op : NAry->operands()) - if (isHighCostExpansionHelper(Op, L, At, Processed)) + Type *OpType = NAry->getType(); + + int PairCost; + int CostIncreasePerStep = 0; + switch (S->getSCEVType()) { + case scAddExpr: + PairCost = TTI->getOperationCost(Instruction::Add, OpType); + break; + case scMulExpr: + PairCost = TTI->getOperationCost(Instruction::Mul, OpType); + break; + case scSMaxExpr: + case scUMaxExpr: + case scSMinExpr: + case scUMinExpr: + PairCost = TTI->getOperationCost(Instruction::ICmp, OpType) + + TTI->getOperationCost(Instruction::Select, OpType); + break; + case scAddRecExpr: + PairCost = TTI->getOperationCost(Instruction::Mul, OpType) + + TTI->getOperationCost(Instruction::Add, OpType); + CostIncreasePerStep = TTI->getOperationCost(Instruction::Mul, OpType); + break; + default: + llvm_unreachable("There are no other Nary expressions."); + } + + assert(NAry->getNumOperands() > 1 && + "Nary expr should have more than 1 operand."); + for (auto *Op : NAry->operands()) { + if (isHighCostExpansionHelper(Op, L, At, BudgetRemaining, TTI, Processed)) return true; + if (Op == *NAry->op_begin()) + continue; + BudgetRemaining -= PairCost; + if (BudgetRemaining < 0) + return true; + PairCost += CostIncreasePerStep; + } + + return BudgetRemaining < 0; } - // If we haven't recognized an expensive SCEV pattern, assume it's an - // expression produced by program code. - return false; + llvm_unreachable("No other scev expressions possible."); } Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred, diff --git a/llvm/lib/Target/ARM/MVETailPredication.cpp b/llvm/lib/Target/ARM/MVETailPredication.cpp --- a/llvm/lib/Target/ARM/MVETailPredication.cpp +++ b/llvm/lib/Target/ARM/MVETailPredication.cpp @@ -156,10 +156,11 @@ SmallVector DeadInsts; SCEVExpander Rewriter(*SE, *DL, "mvetp"); ReplaceExitVal ReplaceExitValue = AlwaysRepl; + unsigned ReplaceExitValueBudget = ~0; // Irrelevant, because AlwaysRepl. formLCSSARecursively(*L, *DT, LI, SE); - rewriteLoopExitValues(L, LI, TLI, SE, Rewriter, DT, ReplaceExitValue, - DeadInsts); + rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT, ReplaceExitValue, + ReplaceExitValueBudget, DeadInsts); } bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) { diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -116,6 +116,12 @@ clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible"))); +static cl::opt ReplaceExitValueBudget( + "replace-exit-value-budget", cl::Hidden, cl::init(4), + cl::desc( + "Control the maximal total instruction cost that we are willing to " + "emit to replace the exit value in IndVarSimplify (default = 4)")); + static cl::opt UsePostIncrementRanges( "indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), @@ -1657,8 +1663,8 @@ // Information about sign/zero extensions of CurrIV. IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); - Changed |= - simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor); + Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter, + &Visitor); if (Visitor.WI.WidestNativeType) { WideIVs.push_back(Visitor.WI); @@ -2691,8 +2697,9 @@ // loop into any instructions outside of the loop that use the final values // of the current expressions. if (ReplaceExitValue != NeverRepl) { - if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, Rewriter, DT, - ReplaceExitValue, DeadInsts)) { + if (int Rewrites = rewriteLoopExitValues( + L, LI, TLI, SE, TTI, Rewriter, DT, ReplaceExitValue, + ReplaceExitValueBudget, DeadInsts)) { NumReplaced += Rewrites; Changed = true; } @@ -2750,9 +2757,9 @@ if (!IndVar) continue; - // Avoid high cost expansions. Note: This heuristic is questionable in - // that our definition of "high cost" is not exactly principled. - if (Rewriter.isHighCostExpansion(ExitCount, L)) + // Avoid high cost expansions. + if (Rewriter.isHighCostExpansion(ExitCount, L, ReplaceExitValueBudget, + TTI)) continue; // Check preconditions for proper SCEVExpander operation. SCEV does not diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp @@ -388,7 +388,7 @@ Loop *EpilogueOuterLoop = nullptr; LoopUnrollResult UnrollResult = UnrollAndJamLoop( L, UP.Count, OuterTripCount, OuterTripMultiple, UP.UnrollRemainder, LI, - &SE, &DT, &AC, &ORE, &EpilogueOuterLoop); + &SE, &DT, &AC, &TTI, &ORE, &EpilogueOuterLoop); // Assign new loop attributes. if (EpilogueOuterLoop) { diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -1148,7 +1148,7 @@ {UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount, UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder, ForgetAllSCEV}, - LI, &SE, &DT, &AC, &ORE, PreserveLCSSA, &RemainderLoop); + LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop); if (UnrollResult == LoopUnrollResult::Unmodified) return LoopUnrollResult::Unmodified; diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -166,11 +166,12 @@ /// simplify/dce pass of the instructions. void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, - AssumptionCache *AC) { + AssumptionCache *AC, + const TargetTransformInfo *TTI) { // Simplify any new induction variables in the partially unrolled loop. if (SE && SimplifyIVs) { SmallVector DeadInsts; - simplifyLoopIVs(L, SE, DT, LI, DeadInsts); + simplifyLoopIVs(L, SE, DT, LI, TTI, DeadInsts); // Aggressively clean up dead instructions that simplifyLoopIVs already // identified. Any remaining should be cleaned up below. @@ -245,6 +246,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, + const TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop) { @@ -403,7 +405,7 @@ if (RuntimeTripCount && ULO.TripMultiple % ULO.Count != 0 && !UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount, EpilogProfitability, ULO.UnrollRemainder, - ULO.ForgetAllSCEV, LI, SE, DT, AC, + ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI, PreserveLCSSA, RemainderLoop)) { if (ULO.Force) RuntimeTripCount = false; @@ -865,7 +867,7 @@ // At this point, the code is well formed. We now simplify the unrolled loop, // doing constant propagation and dead code elimination as we go. simplifyLoopAfterUnroll(L, !CompletelyUnroll && (ULO.Count > 1 || Peeled), LI, - SE, DT, AC); + SE, DT, AC, TTI); NumCompletelyUnrolled += CompletelyUnroll; ++NumUnrolled; diff --git a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp --- a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp @@ -169,10 +169,12 @@ If EpilogueLoop is non-null, it receives the epilogue loop (if it was necessary to create one and not fully unrolled). */ -LoopUnrollResult llvm::UnrollAndJamLoop( - Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple, - bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, - AssumptionCache *AC, OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop) { +LoopUnrollResult +llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, + unsigned TripMultiple, bool UnrollRemainder, + LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, + AssumptionCache *AC, const TargetTransformInfo *TTI, + OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop) { // When we enter here we should have already checked that it is safe BasicBlock *Header = L->getHeader(); @@ -198,7 +200,7 @@ if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false, /*UseEpilogRemainder*/ true, UnrollRemainder, /*ForgetAllSCEV*/ false, - LI, SE, DT, AC, true, EpilogueLoop)) { + LI, SE, DT, AC, TTI, true, EpilogueLoop)) { LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be " "generated when assuming runtime trip count\n"); return LoopUnrollResult::Unmodified; @@ -562,8 +564,9 @@ // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we // go. - simplifyLoopAfterUnroll(SubLoop, true, LI, SE, DT, AC); - simplifyLoopAfterUnroll(L, !CompletelyUnroll && Count > 1, LI, SE, DT, AC); + simplifyLoopAfterUnroll(SubLoop, true, LI, SE, DT, AC, TTI); + simplifyLoopAfterUnroll(L, !CompletelyUnroll && Count > 1, LI, SE, DT, AC, + TTI); NumCompletelyUnrolledAndJammed += CompletelyUnroll; ++NumUnrolledAndJammed; diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp --- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -51,6 +51,12 @@ cl::desc("Allow runtime unrolling for loops with multiple exits, when " "epilog is generated")); +static cl::opt RuntimeUnrollTripCountBudget( + "unroll-runtime-exit-value-budget", cl::Hidden, cl::init(4), + cl::desc( + "Control the maximal total instruction cost that we are willing to " + "emit to for trip count value in LoopUnrollRuntime (default = 4)")); + /// Connect the unrolling prolog code to the original loop. /// The unrolling prolog code contains code to execute the /// 'extra' iterations if the run-time trip count modulo the @@ -543,13 +549,11 @@ /// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2. /// EpilExit: -bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, - bool AllowExpensiveTripCount, - bool UseEpilogRemainder, - bool UnrollRemainder, bool ForgetAllSCEV, - LoopInfo *LI, ScalarEvolution *SE, - DominatorTree *DT, AssumptionCache *AC, - bool PreserveLCSSA, Loop **ResultLoop) { +bool llvm::UnrollRuntimeLoopRemainder( + Loop *L, unsigned Count, bool AllowExpensiveTripCount, + bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV, + LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, + const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop) { LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); LLVM_DEBUG(L->dump()); LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" @@ -637,7 +641,8 @@ const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(*SE, DL, "loop-unroll"); if (!AllowExpensiveTripCount && - Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) { + Expander.isHighCostExpansion(TripCountSC, L, RuntimeUnrollTripCountBudget, + TTI, PreHeaderBR)) { LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n"); return false; } @@ -949,7 +954,7 @@ /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true, /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1, /*PeelCount*/ 0, /*UnrollRemainder*/ false, ForgetAllSCEV}, - LI, SE, DT, AC, /*ORE*/ nullptr, PreserveLCSSA); + LI, SE, DT, AC, TTI, /*ORE*/ nullptr, PreserveLCSSA); } if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled) diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1262,10 +1262,13 @@ return true; } -int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, - TargetLibraryInfo *TLI, ScalarEvolution *SE, SCEVExpander &Rewriter, - DominatorTree *DT, ReplaceExitVal ReplaceExitValue, - SmallVector &DeadInsts) { +int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, + ScalarEvolution *SE, + const TargetTransformInfo *TTI, + SCEVExpander &Rewriter, DominatorTree *DT, + ReplaceExitVal ReplaceExitValue, + unsigned ReplaceExitValueBudget, + SmallVector &DeadInsts) { // Check a pre-condition. assert(L->isRecursivelyLCSSAForm(*DT, *LI) && "Indvars did not preserve LCSSA!"); @@ -1347,14 +1350,16 @@ // Computing the value outside of the loop brings no benefit if it is // definitely used inside the loop in a way which can not be optimized // away. Avoid doing so unless we know we have a value which computes - // the ExitValue already. TODO: This should be merged into SCEV - // expander to leverage its knowledge of existing expressions. - if (ReplaceExitValue != AlwaysRepl && - !isa(ExitValue) && !isa(ExitValue) && - hasHardUserWithinLoop(L, Inst)) + // the ExitValue already, unless the cost of the expansion is deemed + // to not be high. TODO: This should be merged into SCEV expander + // to leverage its knowledge of existing expressions. + bool HighCost = Rewriter.isHighCostExpansion( + ExitValue, L, ReplaceExitValueBudget, TTI, Inst); + if (ReplaceExitValue != AlwaysRepl && !isa(ExitValue) && + !isa(ExitValue) && hasHardUserWithinLoop(L, Inst) && + HighCost) continue; - bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst); Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = " diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -24,6 +24,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" @@ -44,6 +45,12 @@ "Number of IV signed remainder operations converted to unsigned remainder"); STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); +static cl::opt ReplaceIWUserWithLoopInvariantBudget( + "replace-iv-user-with-loop-invariant-budget", cl::Hidden, cl::init(4), + cl::desc("Control the maximal total instruction cost that we are willing " + "to emit to replace the IW user in SimplifyIndvar with " + "loop-invariant (default = 4)")); + namespace { /// This is a utility for simplifying induction variables /// based on ScalarEvolution. It is the primary instrument of the @@ -54,6 +61,7 @@ LoopInfo *LI; ScalarEvolution *SE; DominatorTree *DT; + const TargetTransformInfo *TTI; SCEVExpander &Rewriter; SmallVectorImpl &DeadInsts; @@ -61,10 +69,11 @@ public: SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI, SCEVExpander &Rewriter, + LoopInfo *LI, const TargetTransformInfo *TTI, + SCEVExpander &Rewriter, SmallVectorImpl &Dead) - : L(Loop), LI(LI), SE(SE), DT(DT), Rewriter(Rewriter), DeadInsts(Dead), - Changed(false) { + : L(Loop), LI(LI), SE(SE), DT(DT), TTI(TTI), Rewriter(Rewriter), + DeadInsts(Dead), Changed(false) { assert(LI && "IV simplification requires LoopInfo"); } @@ -667,7 +676,8 @@ return false; // Do not generate something ridiculous even if S is loop invariant. - if (Rewriter.isHighCostExpansion(S, L, I)) + if (Rewriter.isHighCostExpansion(S, L, ReplaceIWUserWithLoopInvariantBudget, + TTI, I)) return false; auto *IP = GetLoopInvariantInsertPosition(L, I); @@ -931,10 +941,11 @@ /// Simplify instructions that use this induction variable /// by using ScalarEvolution to analyze the IV's recurrence. bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI, SmallVectorImpl &Dead, + LoopInfo *LI, const TargetTransformInfo *TTI, + SmallVectorImpl &Dead, SCEVExpander &Rewriter, IVVisitor *V) { - SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Rewriter, - Dead); + SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, TTI, + Rewriter, Dead); SIV.simplifyUsers(CurrIV, V); return SIV.hasChanged(); } @@ -942,14 +953,16 @@ /// Simplify users of induction variables within this /// loop. This does not actually change or add IVs. bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI, SmallVectorImpl &Dead) { + LoopInfo *LI, const TargetTransformInfo *TTI, + SmallVectorImpl &Dead) { SCEVExpander Rewriter(*SE, SE->getDataLayout(), "indvars"); #ifndef NDEBUG Rewriter.setDebugType(DEBUG_TYPE); #endif bool Changed = false; for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) { - Changed |= simplifyUsersOfIV(cast(I), SE, DT, LI, Dead, Rewriter); + Changed |= + simplifyUsersOfIV(cast(I), SE, DT, LI, TTI, Dead, Rewriter); } return Changed; } diff --git a/llvm/test/Transforms/IndVarSimplify/dont-recompute.ll b/llvm/test/Transforms/IndVarSimplify/dont-recompute.ll --- a/llvm/test/Transforms/IndVarSimplify/dont-recompute.ll +++ b/llvm/test/Transforms/IndVarSimplify/dont-recompute.ll @@ -35,8 +35,8 @@ ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 186 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ] -; CHECK-NEXT: tail call void @func(i32 [[ADD_LCSSA]]) +; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[M]], 186 +; CHECK-NEXT: tail call void @func(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; entry: @@ -69,8 +69,8 @@ ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 186 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ] -; CHECK-NEXT: ret i32 [[ADD_LCSSA]] +; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[M]], 186 +; CHECK-NEXT: ret i32 [[TMP0]] ; entry: br label %for.body @@ -101,8 +101,8 @@ ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 186 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ] -; CHECK-NEXT: tail call void @func(i32 [[ADD_LCSSA]]) +; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[M]], 186 +; CHECK-NEXT: tail call void @func(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; entry: @@ -141,8 +141,8 @@ ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 186 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ] -; CHECK-NEXT: [[SOFT_USE:%.*]] = add i32 [[ADD_LCSSA]], 123 +; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[M]], 186 +; CHECK-NEXT: [[SOFT_USE:%.*]] = add i32 [[TMP0]], 123 ; CHECK-NEXT: tail call void @func(i32 [[SOFT_USE]]) ; CHECK-NEXT: ret void ; @@ -178,8 +178,8 @@ ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 186 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ] -; CHECK-NEXT: tail call void @func(i32 [[ADD_LCSSA]]) +; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[M]], 186 +; CHECK-NEXT: tail call void @func(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; entry: @@ -215,8 +215,8 @@ ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 186 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ] -; CHECK-NEXT: tail call void @func(i32 [[ADD_LCSSA]]) +; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[M]], 186 +; CHECK-NEXT: tail call void @func(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/IndVarSimplify/elim-extend.ll b/llvm/test/Transforms/IndVarSimplify/elim-extend.ll --- a/llvm/test/Transforms/IndVarSimplify/elim-extend.ll +++ b/llvm/test/Transforms/IndVarSimplify/elim-extend.ll @@ -8,7 +8,10 @@ define void @postincConstIV(i8* %base, i32 %limit) nounwind { ; CHECK-LABEL: @postincConstIV( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[LIMIT:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[LIMIT:%.*]], 0 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[LIMIT]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] @@ -19,8 +22,8 @@ ; CHECK-NEXT: store i8 0, i8* [[POSTADR]] ; CHECK-NEXT: [[POSTADRNSW:%.*]] = getelementptr inbounds i8, i8* [[BASE]], i64 [[INDVARS_IV_NEXT]] ; CHECK-NEXT: store i8 0, i8* [[POSTADRNSW]] -; CHECK-NEXT: [[COND:%.*]] = icmp sgt i64 [[TMP0]], [[INDVARS_IV]] -; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: br label [[RETURN:%.*]] ; CHECK: return: @@ -113,7 +116,9 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LIMITDEC:%.*]] = add i32 [[LIMIT:%.*]], -1 ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[LIMITDEC]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[LIMIT]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[LIMIT]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[LIMIT]], i32 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64 ; CHECK-NEXT: br label [[OUTERLOOP:%.*]] ; CHECK: outerloop: ; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT2:%.*]], [[OUTERMERGE:%.*]] ], [ 0, [[ENTRY:%.*]] ] @@ -135,11 +140,10 @@ ; CHECK-NEXT: store i8 0, i8* [[ADR2]] ; CHECK-NEXT: [[ADR3:%.*]] = getelementptr i8, i8* [[ADDRESS]], i64 [[INDVARS_IV_NEXT]] ; CHECK-NEXT: store i8 0, i8* [[ADR3]] -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP0]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNERLOOP]], label [[INNEREXIT:%.*]] +; CHECK-NEXT: [[INNERCMP:%.*]] = icmp sgt i64 [[TMP0]], [[INDVARS_IV_NEXT]] +; CHECK-NEXT: br i1 [[INNERCMP]], label [[INNERLOOP]], label [[INNEREXIT:%.*]] ; CHECK: innerexit: -; CHECK-NEXT: [[INNERCOUNT_LCSSA_WIDE:%.*]] = phi i64 [ [[INDVARS_IV_NEXT]], [[INNERLOOP]] ] -; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INNERCOUNT_LCSSA_WIDE]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP0]] to i32 ; CHECK-NEXT: br label [[OUTERMERGE]] ; CHECK: outermerge: ; CHECK-NEXT: [[INNERCOUNT_MERGE]] = phi i32 [ [[TMP4]], [[INNEREXIT]] ], [ [[INNERCOUNT]], [[INNERPREHEADER]] ] @@ -149,8 +153,8 @@ ; CHECK-NEXT: [[ADR5:%.*]] = getelementptr i8, i8* [[ADDRESS]], i64 [[OFS5]] ; CHECK-NEXT: store i8 0, i8* [[ADR5]] ; CHECK-NEXT: [[INDVARS_IV_NEXT2]] = add nuw nsw i64 [[INDVARS_IV1]], 1 -; CHECK-NEXT: [[TMP47:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT2]], [[TMP1]] -; CHECK-NEXT: br i1 [[TMP47]], label [[OUTERLOOP]], label [[RETURN:%.*]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT2]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[OUTERLOOP]], label [[RETURN:%.*]] ; CHECK: return: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/IndVarSimplify/eliminate-comparison.ll b/llvm/test/Transforms/IndVarSimplify/eliminate-comparison.ll --- a/llvm/test/Transforms/IndVarSimplify/eliminate-comparison.ll +++ b/llvm/test/Transforms/IndVarSimplify/eliminate-comparison.ll @@ -529,16 +529,19 @@ ; CHECK-NEXT: [[ENTRY_COND:%.*]] = and i1 [[ENTRY_COND_0]], [[ENTRY_COND_1]] ; CHECK-NEXT: br i1 [[ENTRY_COND]], label [[LOOP_PREHEADER:%.*]], label [[LEAVE:%.*]] ; CHECK: loop.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[LEN]], 0 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[LEN]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SMAX]], -5 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ [[IV_2_INC:%.*]], [[BE:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_INC:%.*]], [[BE:%.*]] ], [ -6, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: call void @side_effect() -; CHECK-NEXT: [[IV_2_INC]] = add nuw i32 [[IV_2]], 1 +; CHECK-NEXT: [[IV_INC]] = add nsw i32 [[IV]], 1 ; CHECK-NEXT: br i1 true, label [[BE]], label [[LEAVE_LOOPEXIT:%.*]] ; CHECK: be: ; CHECK-NEXT: call void @side_effect() -; CHECK-NEXT: [[BE_COND:%.*]] = icmp slt i32 [[IV_2]], [[LEN]] -; CHECK-NEXT: br i1 [[BE_COND]], label [[LOOP]], label [[LEAVE_LOOPEXIT]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_INC]], [[TMP1]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LEAVE_LOOPEXIT]] ; CHECK: leave.loopexit: ; CHECK-NEXT: br label [[LEAVE]] ; CHECK: leave: @@ -685,6 +688,8 @@ ; CHECK-NEXT: [[LENGTH_IS_NONZERO:%.*]] = icmp ne i32 [[LENGTH]], 0 ; CHECK-NEXT: br i1 [[LENGTH_IS_NONZERO]], label [[LOOP_PREHEADER:%.*]], label [[LEAVE:%.*]] ; CHECK: loop.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[LENGTH]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[LENGTH]], i32 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_INC:%.*]], [[BE:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] @@ -693,8 +698,8 @@ ; CHECK-NEXT: br i1 [[EXITCOND]], label [[BE]], label [[LEAVE_LOOPEXIT:%.*]] ; CHECK: be: ; CHECK-NEXT: call void @side_effect() -; CHECK-NEXT: [[BE_COND:%.*]] = icmp slt i32 [[IV_INC]], [[LENGTH]] -; CHECK-NEXT: br i1 [[BE_COND]], label [[LOOP]], label [[LEAVE_LOOPEXIT]] +; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[IV_INC]], [[SMAX]] +; CHECK-NEXT: br i1 [[EXITCOND1]], label [[LOOP]], label [[LEAVE_LOOPEXIT]] ; CHECK: leave.loopexit: ; CHECK-NEXT: br label [[LEAVE]] ; CHECK: leave: diff --git a/llvm/test/Transforms/IndVarSimplify/eliminate-trunc.ll b/llvm/test/Transforms/IndVarSimplify/eliminate-trunc.ll --- a/llvm/test/Transforms/IndVarSimplify/eliminate-trunc.ll +++ b/llvm/test/Transforms/IndVarSimplify/eliminate-trunc.ll @@ -36,13 +36,16 @@ ; ; CHECK-LABEL: @test_01( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[N:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 [[IV]], [[SEXT]] -; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -63,13 +66,16 @@ ; ; CHECK-LABEL: @test_02( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[N:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 2147483646 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 2147483646 +; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 2147483646, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 [[IV]], [[SEXT]] -; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -113,13 +119,16 @@ ; ; CHECK-LABEL: @test_04( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[N:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], -2147483647 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 -2147483647 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SMAX]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ -2147483647, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 [[IV]], [[SEXT]] -; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[TMP1]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -243,13 +252,16 @@ define void @test_02_unsigned(i32 %n) { ; CHECK-LABEL: @test_02_unsigned( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], -2 +; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 -2 +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[UMAX]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 4294967294, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[IV]], [[ZEXT]] -; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[TMP1]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -318,13 +330,16 @@ define void @test_05_unsigned(i32 %n) { ; CHECK-LABEL: @test_05_unsigned( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[IV]], [[ZEXT]] -; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[TMP1]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -366,14 +381,18 @@ define void @test_07(i32* %p, i32 %n) { ; CHECK-LABEL: @test_07( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[NARROW_IV:%.*]] = trunc i64 [[IV]] to i32 ; CHECK-NEXT: store i32 [[NARROW_IV]], i32* [[P:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[NARROW_IV]], [[N:%.*]] -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -451,15 +470,17 @@ define void @test_10(i32 %n) { ; CHECK-LABEL: @test_10( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[N:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 100 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 90 +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 90 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[UMIN]], -99 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ -100, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = icmp ne i64 [[IV]], [[SEXT]] -; CHECK-NEXT: [[NEGCMP:%.*]] = icmp slt i64 [[IV]], -10 -; CHECK-NEXT: [[CMP:%.*]] = and i1 [[TMP0]], [[NEGCMP]] -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[TMP3]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -530,13 +551,15 @@ ; CHECK-LABEL: @test_12( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[N:%.*]] = load i32, i32* [[P:%.*]], !range !0 -; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[IV_NEXT]], [[ZEXT]] -; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/IndVarSimplify/full_widening.ll b/llvm/test/Transforms/IndVarSimplify/full_widening.ll --- a/llvm/test/Transforms/IndVarSimplify/full_widening.ll +++ b/llvm/test/Transforms/IndVarSimplify/full_widening.ll @@ -7,7 +7,9 @@ define i32 @test_01(double* %p, double %x, i32* %np, i32* %mp, i32 %k) { ; CHECK-LABEL: @test_01( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[K:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[K:%.*]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[K]], i32 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV_WIDE:%.*]] = phi i64 [ [[CANONICAL_IV_NEXT_I:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] @@ -17,8 +19,8 @@ ; CHECK-NEXT: [[MUL:%.*]] = fmul double [[X:%.*]], [[LOAD]] ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds double, double* [[P]], i64 [[IV_WIDE]] ; CHECK-NEXT: store atomic double [[MUL]], double* [[GEP2]] unordered, align 8 -; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i64 [[CANONICAL_IV_NEXT_I]], [[TMP0]] -; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[CANONICAL_IV_NEXT_I]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret i32 0 ; diff --git a/llvm/test/Transforms/IndVarSimplify/iv-widen.ll b/llvm/test/Transforms/IndVarSimplify/iv-widen.ll --- a/llvm/test/Transforms/IndVarSimplify/iv-widen.ll +++ b/llvm/test/Transforms/IndVarSimplify/iv-widen.ll @@ -122,15 +122,17 @@ ; CHECK-NEXT: [[ENTRY_COND:%.*]] = icmp ne i32 [[LIM:%.*]], 0 ; CHECK-NEXT: br i1 [[ENTRY_COND]], label [[LOOP_PREHEADER:%.*]], label [[LEAVE:%.*]] ; CHECK: loop.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LIM]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[LIM]], 2 +; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[LIM]], i32 2 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[UMAX]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 1, [[LOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: call void @dummy.i64(i64 [[TMP1]]) -; CHECK-NEXT: [[BE_COND:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], [[TMP0]] -; CHECK-NEXT: br i1 [[BE_COND]], label [[LOOP]], label [[LEAVE_LOOPEXIT:%.*]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LEAVE_LOOPEXIT:%.*]] ; CHECK: leave.loopexit: ; CHECK-NEXT: br label [[LEAVE]] ; CHECK: leave: @@ -165,7 +167,9 @@ ; CHECK-NEXT: [[BC0:%.*]] = bitcast i32* [[LINED:%.*]] to i8* ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[SIZE]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[HSIZE:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[NSTEPS:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[NSTEPS:%.*]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP2]], i32 [[NSTEPS]], i32 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT11:%.*]] = zext i32 [[SMAX]] to i64 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] @@ -200,8 +204,8 @@ ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT8]], [[TMP2]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] +; CHECK-NEXT: [[EXITCOND12:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT8]], [[WIDE_TRIP_COUNT11]] +; CHECK-NEXT: br i1 [[EXITCOND12]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll b/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll --- a/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll +++ b/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll @@ -128,18 +128,18 @@ define void @compound_early_exit(i32 %n, i32 %m) { ; CHECK-LABEL: @compound_early_exit( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[M:%.*]], [[N:%.*]] +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP0]], i32 [[M]], i32 [[N]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[EARLYCND:%.*]] = icmp ult i32 [[IV]], [[N:%.*]] -; CHECK-NEXT: [[EARLYCND2:%.*]] = icmp ult i32 [[IV]], [[M:%.*]] -; CHECK-NEXT: [[AND:%.*]] = and i1 [[EARLYCND]], [[EARLYCND2]] -; CHECK-NEXT: br i1 [[AND]], label [[LATCH]], label [[EXIT:%.*]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV]], [[UMIN]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; CHECK-NEXT: store volatile i32 [[IV]], i32* @A -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT]] +; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND1]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll b/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll --- a/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll +++ b/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll @@ -187,13 +187,15 @@ ; ; CHECK-LABEL: @unguardedloop( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[IROW:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[IROW:%.*]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[IROW]], i32 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1 -; CHECK-NEXT: [[CMP196:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT3]], [[TMP0]] -; CHECK-NEXT: br i1 [[CMP196]], label [[LOOP]], label [[RETURN:%.*]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[RETURN:%.*]] ; CHECK: return: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/IndVarSimplify/loop-invariant-conditions.ll b/llvm/test/Transforms/IndVarSimplify/loop-invariant-conditions.ll --- a/llvm/test/Transforms/IndVarSimplify/loop-invariant-conditions.ll +++ b/llvm/test/Transforms/IndVarSimplify/loop-invariant-conditions.ll @@ -311,12 +311,15 @@ define void @test3_neg(i64 %start) { ; CHECK-LABEL: @test3_neg( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[START:%.*]], -1 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[START]], i64 -1 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[SMAX]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP1]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -336,16 +339,19 @@ define void @test4_neg(i64 %start) { ; CHECK-LABEL: @test4_neg( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[START:%.*]], 0 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[START]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = add nuw i64 [[SMAX]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 25 ; CHECK-NEXT: br i1 [[CMP]], label [[BACKEDGE]], label [[FOR_END:%.*]] ; CHECK: backedge: ; CHECK-NEXT: call void @foo() -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_END]], label [[LOOP]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP1]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[LOOP]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/IndVarSimplify/lrev-existing-umin.ll b/llvm/test/Transforms/IndVarSimplify/lrev-existing-umin.ll --- a/llvm/test/Transforms/IndVarSimplify/lrev-existing-umin.ll +++ b/llvm/test/Transforms/IndVarSimplify/lrev-existing-umin.ll @@ -26,8 +26,7 @@ ; CHECK-NEXT: [[TMP23:%.*]] = icmp slt i32 [[TMP22]], [[TMP14]] ; CHECK-NEXT: br i1 [[TMP23]], label [[NOT_ZERO11]], label [[MAIN_EXIT_SELECTOR:%.*]] ; CHECK: main.exit.selector: -; CHECK-NEXT: [[TMP22_LCSSA:%.*]] = phi i32 [ [[TMP22]], [[NOT_ZERO11]] ] -; CHECK-NEXT: [[TMP24:%.*]] = icmp slt i32 [[TMP22_LCSSA]], [[LENGTH_I]] +; CHECK-NEXT: [[TMP24:%.*]] = icmp slt i32 [[TMP14]], [[LENGTH_I]] ; CHECK-NEXT: br i1 [[TMP24]], label [[NOT_ZERO11_POSTLOOP]], label [[LEAVE:%.*]] ; CHECK: leave: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/IndVarSimplify/pr28705.ll b/llvm/test/Transforms/IndVarSimplify/pr28705.ll --- a/llvm/test/Transforms/IndVarSimplify/pr28705.ll +++ b/llvm/test/Transforms/IndVarSimplify/pr28705.ll @@ -16,14 +16,14 @@ ; CHECK: for.body650.lr.ph: ; CHECK-NEXT: br label [[FOR_BODY650:%.*]] ; CHECK: loopexit: -; CHECK-NEXT: [[INC_I_I_LCSSA:%.*]] = phi i32 [ [[INC_I_I:%.*]], [[FOR_BODY650]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[DOTSROA_SPECULATED]], 1 ; CHECK-NEXT: br label [[XZ_EXIT]] ; CHECK: XZ.exit: -; CHECK-NEXT: [[DB_SROA_9_0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC_I_I_LCSSA]], [[LOOPEXIT:%.*]] ] +; CHECK-NEXT: [[DB_SROA_9_0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[TMP0]], [[LOOPEXIT:%.*]] ] ; CHECK-NEXT: br label [[END:%.*]] ; CHECK: for.body650: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[FOR_BODY650_LR_PH]] ], [ [[INC655:%.*]], [[FOR_BODY650]] ] -; CHECK-NEXT: [[IV2:%.*]] = phi i32 [ 1, [[FOR_BODY650_LR_PH]] ], [ [[INC_I_I]], [[FOR_BODY650]] ] +; CHECK-NEXT: [[IV2:%.*]] = phi i32 [ 1, [[FOR_BODY650_LR_PH]] ], [ [[INC_I_I:%.*]], [[FOR_BODY650]] ] ; CHECK-NEXT: [[ARRAYIDX_I_I1105:%.*]] = getelementptr inbounds i8, i8* [[REF_I1174:%.*]], i32 [[IV2]] ; CHECK-NEXT: store i8 7, i8* [[ARRAYIDX_I_I1105]], align 1 ; CHECK-NEXT: [[INC_I_I]] = add nuw nsw i32 [[IV2]], 1 diff --git a/llvm/test/Transforms/IndVarSimplify/pr39673.ll b/llvm/test/Transforms/IndVarSimplify/pr39673.ll --- a/llvm/test/Transforms/IndVarSimplify/pr39673.ll +++ b/llvm/test/Transforms/IndVarSimplify/pr39673.ll @@ -72,8 +72,8 @@ ; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i16 [[L2_ADD]], 2 ; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP2]], label [[LOOP2_END:%.*]] ; CHECK: loop2.end: -; CHECK-NEXT: [[K2_ADD_LCSSA:%.*]] = phi i16 [ [[K2_ADD]], [[LOOP2]] ] -; CHECK-NEXT: ret i16 [[K2_ADD_LCSSA]] +; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[ARG2]], 2 +; CHECK-NEXT: ret i16 [[TMP0]] ; entry: br label %loop1 @@ -121,8 +121,8 @@ ; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i16 [[L2_ADD]], 2 ; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP2]], label [[LOOP2_END:%.*]] ; CHECK: loop2.end: -; CHECK-NEXT: [[K2_ADD_LCSSA:%.*]] = phi i16 [ [[K2_ADD]], [[LOOP2]] ] -; CHECK-NEXT: ret i16 [[K2_ADD_LCSSA]] +; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[DUMMY]], 2 +; CHECK-NEXT: ret i16 [[TMP0]] ; entry: br label %loop2.preheader @@ -166,8 +166,8 @@ ; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i16 [[L2_ADD]], 2 ; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP2]], label [[LOOP2_END:%.*]] ; CHECK: loop2.end: -; CHECK-NEXT: [[K2_ADD_LCSSA:%.*]] = phi i16 [ [[K2_ADD]], [[LOOP2]] ] -; CHECK-NEXT: ret i16 [[K2_ADD_LCSSA]] +; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], 2 +; CHECK-NEXT: ret i16 [[TMP1]] ; entry: br label %loop1 diff --git a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll --- a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll +++ b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll @@ -24,30 +24,33 @@ ; CHECK: for.body.lr.ph: ; CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** @ptr, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* @e, align 4 -; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 0 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = add nuw i32 [[SMAX]], 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP4]] to i64 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV:%.*]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV]], [[TMP3]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_FOR_END_LOOPEXIT_CRIT_EDGE:%.*]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_FOR_END_LOOPEXIT_CRIT_EDGE:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV]] = phi i64 [ [[INDVARS_IV_NEXT]], [[FOR_COND:%.*]] ], [ 0, [[FOR_BODY_LR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP5]], 0 ; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_COND]] ; CHECK: if.then: ; CHECK-NEXT: [[I_05_LCSSA_WIDE:%.*]] = phi i64 [ [[INDVARS_IV]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[I_05_LCSSA_WIDE]] to i32 -; CHECK-NEXT: store i32 [[TMP5]], i32* @idx, align 4 +; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[I_05_LCSSA_WIDE]] to i32 +; CHECK-NEXT: store i32 [[TMP6]], i32* @idx, align 4 ; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.cond.for.end.loopexit_crit_edge: ; CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* @idx, align 4 -; CHECK-NEXT: ret i32 [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* @idx, align 4 +; CHECK-NEXT: ret i32 [[TMP7]] ; entry: store i32 -1, i32* @idx, align 4 @@ -96,7 +99,8 @@ ; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[LIMIT:%.*]] to i32 ; CHECK-NEXT: br i1 undef, label [[FOR_COND1_PREHEADER_PREHEADER:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]] ; CHECK: for.cond1.preheader.us.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[CONV]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[CONV]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[CONV]], i32 1 ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] ; CHECK: for.cond1.preheader.preheader: ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] @@ -107,8 +111,8 @@ ; CHECK-NEXT: br label [[FOR_INC13_US]] ; CHECK: for.inc13.us: ; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], 4 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_END_LOOPEXIT1:%.*]] +; CHECK-NEXT: [[EXITCOND4:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], 4 +; CHECK-NEXT: br i1 [[EXITCOND4]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_END_LOOPEXIT1:%.*]] ; CHECK: for.body4.us: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY4_LR_PH_US]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY4_US:%.*]] ] ; CHECK-NEXT: [[ARRAYIDX6_US:%.*]] = getelementptr inbounds [8 x i8], [8 x i8]* [[A:%.*]], i64 [[INDVARS_IV2]], i64 [[INDVARS_IV]] @@ -118,9 +122,10 @@ ; CHECK-NEXT: [[TMP2:%.*]] = load i8, i8* [[ARRAYIDX8_US]], align 1 ; CHECK-NEXT: store i8 [[TMP2]], i8* [[ARRAYIDX6_US]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], [[TMP0]] -; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY4_US]], label [[FOR_INC13_US_LOOPEXIT:%.*]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY4_US]], label [[FOR_INC13_US_LOOPEXIT:%.*]] ; CHECK: for.body4.lr.ph.us: +; CHECK-NEXT: [[WIDE_TRIP_COUNT]] = zext i32 [[SMAX]] to i64 ; CHECK-NEXT: br label [[FOR_BODY4_US]] ; CHECK: for.cond1.preheader: ; CHECK-NEXT: br i1 false, label [[FOR_INC13:%.*]], label [[FOR_INC13]] @@ -180,13 +185,15 @@ define i32 @test3(i32* %a, i32 %b) { ; CHECK-LABEL: @test3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[B:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[B:%.*]], 0 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[B]], i32 0 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64 ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[SUM_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV]], [[TMP0]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 @@ -300,17 +307,20 @@ define i32 @test6(i32* %a, i32 %b) { ; CHECK-LABEL: @test6( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[B:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[B:%.*]], -1 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[B]], i32 -1 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SMAX]], 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[SUM_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[CMP:%.*]] = icmp sle i64 [[INDVARS_IV]], [[TMP0]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM_0]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM_0]], [[TMP2]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: br label [[FOR_COND]] ; CHECK: for.end: @@ -342,7 +352,10 @@ ; CHECK-LABEL: @test7( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[B:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[B]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[B]], -1 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[B]], i32 -1 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SMAX]], 2 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP2]] to i64 ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ] @@ -351,11 +364,11 @@ ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM_0]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM_0]], [[TMP3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[CMP2:%.*]] = icmp sle i64 [[INDVARS_IV]], [[TMP1]] -; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[FOR_END]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND]], label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0]], [[FOR_BODY]] ], [ [[SUM_0]], [[FOR_COND]] ] ; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] @@ -444,13 +457,15 @@ ; CHECK-NEXT: br i1 [[E]], label [[FOR_COND_PREHEADER:%.*]], label [[LEAVE:%.*]] ; CHECK: for.cond.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[INIT]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[B:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[INIT]], [[B:%.*]] +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[INIT]], i32 [[B]] +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64 ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_COND_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ] ; CHECK-NEXT: [[SUM_0:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_COND_PREHEADER]] ] -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV]], [[TMP1]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll --- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -26,29 +26,119 @@ ; AUTO_VEC-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float ; AUTO_VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 ; AUTO_VEC-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 +; AUTO_VEC-NEXT: [[TMP1:%.*]] = add nsw i64 [[N_VEC]], -32 +; AUTO_VEC-NEXT: [[TMP2:%.*]] = lshr exact i64 [[TMP1]], 5 +; AUTO_VEC-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 +; AUTO_VEC-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP3]], 3 +; AUTO_VEC-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP1]], 96 +; AUTO_VEC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]] +; AUTO_VEC: vector.ph.new: +; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = sub nsw i64 [[TMP3]], [[XTRAITER]] ; AUTO_VEC-NEXT: br label [[VECTOR_BODY:%.*]] ; AUTO_VEC: vector.body: -; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; AUTO_VEC-NEXT: [[VEC_IND:%.*]] = phi <8 x float> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_3:%.*]], [[VECTOR_BODY]] ] +; AUTO_VEC-NEXT: [[VEC_IND:%.*]] = phi <8 x float> [ , [[VECTOR_PH_NEW]] ], [ [[VEC_IND_NEXT_3:%.*]], [[VECTOR_BODY]] ] +; AUTO_VEC-NEXT: [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[VECTOR_PH_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[VECTOR_BODY]] ] ; AUTO_VEC-NEXT: [[STEP_ADD:%.*]] = fadd fast <8 x float> [[VEC_IND]], ; AUTO_VEC-NEXT: [[STEP_ADD5:%.*]] = fadd fast <8 x float> [[VEC_IND]], ; AUTO_VEC-NEXT: [[STEP_ADD6:%.*]] = fadd fast <8 x float> [[VEC_IND]], -; AUTO_VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] -; AUTO_VEC-NEXT: [[TMP2:%.*]] = bitcast float* [[TMP1]] to <8 x float>* -; AUTO_VEC-NEXT: store <8 x float> [[VEC_IND]], <8 x float>* [[TMP2]], align 4 -; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 8 -; AUTO_VEC-NEXT: [[TMP4:%.*]] = bitcast float* [[TMP3]] to <8 x float>* -; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD]], <8 x float>* [[TMP4]], align 4 -; AUTO_VEC-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 16 +; AUTO_VEC-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] ; AUTO_VEC-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP5]] to <8 x float>* -; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD5]], <8 x float>* [[TMP6]], align 4 -; AUTO_VEC-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 24 +; AUTO_VEC-NEXT: store <8 x float> [[VEC_IND]], <8 x float>* [[TMP6]], align 4 +; AUTO_VEC-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP5]], i64 8 ; AUTO_VEC-NEXT: [[TMP8:%.*]] = bitcast float* [[TMP7]] to <8 x float>* -; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD6]], <8 x float>* [[TMP8]], align 4 -; AUTO_VEC-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 32 -; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd fast <8 x float> [[VEC_IND]], -; AUTO_VEC-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; AUTO_VEC-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD]], <8 x float>* [[TMP8]], align 4 +; AUTO_VEC-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP5]], i64 16 +; AUTO_VEC-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to <8 x float>* +; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD5]], <8 x float>* [[TMP10]], align 4 +; AUTO_VEC-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, float* [[TMP5]], i64 24 +; AUTO_VEC-NEXT: [[TMP12:%.*]] = bitcast float* [[TMP11]] to <8 x float>* +; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD6]], <8 x float>* [[TMP12]], align 4 +; AUTO_VEC-NEXT: [[INDEX_NEXT:%.*]] = or i64 [[INDEX]], 32 +; AUTO_VEC-NEXT: [[VEC_IND_NEXT:%.*]] = fadd fast <8 x float> [[VEC_IND]], +; AUTO_VEC-NEXT: [[STEP_ADD_1:%.*]] = fadd fast <8 x float> [[VEC_IND]], +; AUTO_VEC-NEXT: [[STEP_ADD5_1:%.*]] = fadd fast <8 x float> [[VEC_IND]], +; AUTO_VEC-NEXT: [[STEP_ADD6_1:%.*]] = fadd fast <8 x float> [[VEC_IND]], +; AUTO_VEC-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX_NEXT]] +; AUTO_VEC-NEXT: [[TMP14:%.*]] = bitcast float* [[TMP13]] to <8 x float>* +; AUTO_VEC-NEXT: store <8 x float> [[VEC_IND_NEXT]], <8 x float>* [[TMP14]], align 4 +; AUTO_VEC-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 8 +; AUTO_VEC-NEXT: [[TMP16:%.*]] = bitcast float* [[TMP15]] to <8 x float>* +; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD_1]], <8 x float>* [[TMP16]], align 4 +; AUTO_VEC-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 16 +; AUTO_VEC-NEXT: [[TMP18:%.*]] = bitcast float* [[TMP17]] to <8 x float>* +; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD5_1]], <8 x float>* [[TMP18]], align 4 +; AUTO_VEC-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 24 +; AUTO_VEC-NEXT: [[TMP20:%.*]] = bitcast float* [[TMP19]] to <8 x float>* +; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD6_1]], <8 x float>* [[TMP20]], align 4 +; AUTO_VEC-NEXT: [[INDEX_NEXT_1:%.*]] = or i64 [[INDEX]], 64 +; AUTO_VEC-NEXT: [[VEC_IND_NEXT_1:%.*]] = fadd fast <8 x float> [[VEC_IND]], +; AUTO_VEC-NEXT: [[STEP_ADD_2:%.*]] = fadd fast <8 x float> [[VEC_IND]], +; AUTO_VEC-NEXT: [[STEP_ADD5_2:%.*]] = fadd fast <8 x float> [[VEC_IND]], +; AUTO_VEC-NEXT: [[STEP_ADD6_2:%.*]] = fadd fast <8 x float> [[VEC_IND]], +; AUTO_VEC-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX_NEXT_1]] +; AUTO_VEC-NEXT: [[TMP22:%.*]] = bitcast float* [[TMP21]] to <8 x float>* +; AUTO_VEC-NEXT: store <8 x float> [[VEC_IND_NEXT_1]], <8 x float>* [[TMP22]], align 4 +; AUTO_VEC-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 8 +; AUTO_VEC-NEXT: [[TMP24:%.*]] = bitcast float* [[TMP23]] to <8 x float>* +; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD_2]], <8 x float>* [[TMP24]], align 4 +; AUTO_VEC-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 16 +; AUTO_VEC-NEXT: [[TMP26:%.*]] = bitcast float* [[TMP25]] to <8 x float>* +; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD5_2]], <8 x float>* [[TMP26]], align 4 +; AUTO_VEC-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 24 +; AUTO_VEC-NEXT: [[TMP28:%.*]] = bitcast float* [[TMP27]] to <8 x float>* +; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD6_2]], <8 x float>* [[TMP28]], align 4 +; AUTO_VEC-NEXT: [[INDEX_NEXT_2:%.*]] = or i64 [[INDEX]], 96 +; AUTO_VEC-NEXT: [[VEC_IND_NEXT_2:%.*]] = fadd fast <8 x float> [[VEC_IND]], +; AUTO_VEC-NEXT: [[STEP_ADD_3:%.*]] = fadd fast <8 x float> [[VEC_IND]], +; AUTO_VEC-NEXT: [[STEP_ADD5_3:%.*]] = fadd fast <8 x float> [[VEC_IND]], +; AUTO_VEC-NEXT: [[STEP_ADD6_3:%.*]] = fadd fast <8 x float> [[VEC_IND]], +; AUTO_VEC-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX_NEXT_2]] +; AUTO_VEC-NEXT: [[TMP30:%.*]] = bitcast float* [[TMP29]] to <8 x float>* +; AUTO_VEC-NEXT: store <8 x float> [[VEC_IND_NEXT_2]], <8 x float>* [[TMP30]], align 4 +; AUTO_VEC-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 8 +; AUTO_VEC-NEXT: [[TMP32:%.*]] = bitcast float* [[TMP31]] to <8 x float>* +; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD_3]], <8 x float>* [[TMP32]], align 4 +; AUTO_VEC-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 16 +; AUTO_VEC-NEXT: [[TMP34:%.*]] = bitcast float* [[TMP33]] to <8 x float>* +; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD5_3]], <8 x float>* [[TMP34]], align 4 +; AUTO_VEC-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 24 +; AUTO_VEC-NEXT: [[TMP36:%.*]] = bitcast float* [[TMP35]] to <8 x float>* +; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD6_3]], <8 x float>* [[TMP36]], align 4 +; AUTO_VEC-NEXT: [[INDEX_NEXT_3]] = add i64 [[INDEX]], 128 +; AUTO_VEC-NEXT: [[VEC_IND_NEXT_3]] = fadd fast <8 x float> [[VEC_IND]], +; AUTO_VEC-NEXT: [[NITER_NSUB_3]] = add i64 [[NITER]], -4 +; AUTO_VEC-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NSUB_3]], 0 +; AUTO_VEC-NEXT: br i1 [[NITER_NCMP_3]], label [[MIDDLE_BLOCK_UNR_LCSSA]], label [[VECTOR_BODY]], !llvm.loop !0 +; AUTO_VEC: middle.block.unr-lcssa: +; AUTO_VEC-NEXT: [[INDEX_UNR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT_3]], [[VECTOR_BODY]] ] +; AUTO_VEC-NEXT: [[VEC_IND_UNR:%.*]] = phi <8 x float> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_3]], [[VECTOR_BODY]] ] +; AUTO_VEC-NEXT: [[LCMP_MOD:%.*]] = icmp eq i64 [[XTRAITER]], 0 +; AUTO_VEC-NEXT: br i1 [[LCMP_MOD]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY_EPIL:%.*]] +; AUTO_VEC: vector.body.epil: +; AUTO_VEC-NEXT: [[INDEX_EPIL:%.*]] = phi i64 [ [[INDEX_NEXT_EPIL:%.*]], [[VECTOR_BODY_EPIL]] ], [ [[INDEX_UNR]], [[MIDDLE_BLOCK_UNR_LCSSA]] ] +; AUTO_VEC-NEXT: [[VEC_IND_EPIL:%.*]] = phi <8 x float> [ [[VEC_IND_NEXT_EPIL:%.*]], [[VECTOR_BODY_EPIL]] ], [ [[VEC_IND_UNR]], [[MIDDLE_BLOCK_UNR_LCSSA]] ] +; AUTO_VEC-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_SUB:%.*]], [[VECTOR_BODY_EPIL]] ], [ [[XTRAITER]], [[MIDDLE_BLOCK_UNR_LCSSA]] ] +; AUTO_VEC-NEXT: [[STEP_ADD_EPIL:%.*]] = fadd fast <8 x float> [[VEC_IND_EPIL]], +; AUTO_VEC-NEXT: [[STEP_ADD5_EPIL:%.*]] = fadd fast <8 x float> [[VEC_IND_EPIL]], +; AUTO_VEC-NEXT: [[STEP_ADD6_EPIL:%.*]] = fadd fast <8 x float> [[VEC_IND_EPIL]], +; AUTO_VEC-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX_EPIL]] +; AUTO_VEC-NEXT: [[TMP38:%.*]] = bitcast float* [[TMP37]] to <8 x float>* +; AUTO_VEC-NEXT: store <8 x float> [[VEC_IND_EPIL]], <8 x float>* [[TMP38]], align 4 +; AUTO_VEC-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, float* [[TMP37]], i64 8 +; AUTO_VEC-NEXT: [[TMP40:%.*]] = bitcast float* [[TMP39]] to <8 x float>* +; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD_EPIL]], <8 x float>* [[TMP40]], align 4 +; AUTO_VEC-NEXT: [[TMP41:%.*]] = getelementptr inbounds float, float* [[TMP37]], i64 16 +; AUTO_VEC-NEXT: [[TMP42:%.*]] = bitcast float* [[TMP41]] to <8 x float>* +; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD5_EPIL]], <8 x float>* [[TMP42]], align 4 +; AUTO_VEC-NEXT: [[TMP43:%.*]] = getelementptr inbounds float, float* [[TMP37]], i64 24 +; AUTO_VEC-NEXT: [[TMP44:%.*]] = bitcast float* [[TMP43]] to <8 x float>* +; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD6_EPIL]], <8 x float>* [[TMP44]], align 4 +; AUTO_VEC-NEXT: [[INDEX_NEXT_EPIL]] = add i64 [[INDEX_EPIL]], 32 +; AUTO_VEC-NEXT: [[VEC_IND_NEXT_EPIL]] = fadd fast <8 x float> [[VEC_IND_EPIL]], +; AUTO_VEC-NEXT: [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1 +; AUTO_VEC-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0 +; AUTO_VEC-NEXT: br i1 [[EPIL_ITER_CMP]], label [[MIDDLE_BLOCK]], label [[VECTOR_BODY_EPIL]], !llvm.loop !2 ; AUTO_VEC: middle.block: ; AUTO_VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[ZEXT]] ; AUTO_VEC-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] @@ -59,8 +149,8 @@ ; AUTO_VEC-NEXT: store float [[X_06]], float* [[ARRAYIDX]], align 4 ; AUTO_VEC-NEXT: [[CONV1]] = fadd float [[X_06]], 5.000000e-01 ; AUTO_VEC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; AUTO_VEC-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]] -; AUTO_VEC-NEXT: br i1 [[TMP10]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2 +; AUTO_VEC-NEXT: [[TMP45:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]] +; AUTO_VEC-NEXT: br i1 [[TMP45]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !4 ; AUTO_VEC: for.end: ; AUTO_VEC-NEXT: ret void ; @@ -167,7 +257,7 @@ ; AUTO_VEC-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 1 ; AUTO_VEC-NEXT: [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1 ; AUTO_VEC-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0 -; AUTO_VEC-NEXT: br i1 [[EPIL_ITER_CMP]], label [[FOR_END]], label [[FOR_BODY_EPIL]], !llvm.loop !4 +; AUTO_VEC-NEXT: br i1 [[EPIL_ITER_CMP]], label [[FOR_END]], label [[FOR_BODY_EPIL]], !llvm.loop !6 ; AUTO_VEC: for.end: ; AUTO_VEC-NEXT: ret void ; @@ -207,34 +297,124 @@ ; AUTO_VEC-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775792 ; AUTO_VEC-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to double ; AUTO_VEC-NEXT: [[TMP1:%.*]] = fmul fast double [[CAST_CRD]], 3.000000e+00 +; AUTO_VEC-NEXT: [[TMP2:%.*]] = add nsw i64 [[N_VEC]], -16 +; AUTO_VEC-NEXT: [[TMP3:%.*]] = lshr exact i64 [[TMP2]], 4 +; AUTO_VEC-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 +; AUTO_VEC-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP4]], 3 +; AUTO_VEC-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP2]], 48 +; AUTO_VEC-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]] +; AUTO_VEC: vector.ph.new: +; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = sub nsw i64 [[TMP4]], [[XTRAITER]] ; AUTO_VEC-NEXT: br label [[VECTOR_BODY:%.*]] ; AUTO_VEC: vector.body: -; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; AUTO_VEC-NEXT: [[VEC_IND:%.*]] = phi <4 x double> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_3:%.*]], [[VECTOR_BODY]] ] +; AUTO_VEC-NEXT: [[VEC_IND:%.*]] = phi <4 x double> [ , [[VECTOR_PH_NEW]] ], [ [[VEC_IND_NEXT_3:%.*]], [[VECTOR_BODY]] ] +; AUTO_VEC-NEXT: [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[VECTOR_PH_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[VECTOR_BODY]] ] ; AUTO_VEC-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x double> [[VEC_IND]], ; AUTO_VEC-NEXT: [[STEP_ADD5:%.*]] = fadd fast <4 x double> [[VEC_IND]], ; AUTO_VEC-NEXT: [[STEP_ADD6:%.*]] = fadd fast <4 x double> [[VEC_IND]], -; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr double, double* [[A:%.*]], i64 [[INDEX]] -; AUTO_VEC-NEXT: [[TMP3:%.*]] = bitcast double* [[TMP2]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND]], <4 x double>* [[TMP3]], align 8 -; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr double, double* [[TMP2]], i64 4 -; AUTO_VEC-NEXT: [[TMP5:%.*]] = bitcast double* [[TMP4]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD]], <4 x double>* [[TMP5]], align 8 -; AUTO_VEC-NEXT: [[TMP6:%.*]] = getelementptr double, double* [[TMP2]], i64 8 +; AUTO_VEC-NEXT: [[TMP6:%.*]] = getelementptr double, double* [[A:%.*]], i64 [[INDEX]] ; AUTO_VEC-NEXT: [[TMP7:%.*]] = bitcast double* [[TMP6]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD5]], <4 x double>* [[TMP7]], align 8 -; AUTO_VEC-NEXT: [[TMP8:%.*]] = getelementptr double, double* [[TMP2]], i64 12 +; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND]], <4 x double>* [[TMP7]], align 8 +; AUTO_VEC-NEXT: [[TMP8:%.*]] = getelementptr double, double* [[TMP6]], i64 4 ; AUTO_VEC-NEXT: [[TMP9:%.*]] = bitcast double* [[TMP8]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD6]], <4 x double>* [[TMP9]], align 8 -; AUTO_VEC-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 -; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x double> [[VEC_IND]], -; AUTO_VEC-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; AUTO_VEC-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !6 +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD]], <4 x double>* [[TMP9]], align 8 +; AUTO_VEC-NEXT: [[TMP10:%.*]] = getelementptr double, double* [[TMP6]], i64 8 +; AUTO_VEC-NEXT: [[TMP11:%.*]] = bitcast double* [[TMP10]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD5]], <4 x double>* [[TMP11]], align 8 +; AUTO_VEC-NEXT: [[TMP12:%.*]] = getelementptr double, double* [[TMP6]], i64 12 +; AUTO_VEC-NEXT: [[TMP13:%.*]] = bitcast double* [[TMP12]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD6]], <4 x double>* [[TMP13]], align 8 +; AUTO_VEC-NEXT: [[INDEX_NEXT:%.*]] = or i64 [[INDEX]], 16 +; AUTO_VEC-NEXT: [[VEC_IND_NEXT:%.*]] = fadd fast <4 x double> [[VEC_IND]], +; AUTO_VEC-NEXT: [[STEP_ADD_1:%.*]] = fadd fast <4 x double> [[VEC_IND]], +; AUTO_VEC-NEXT: [[STEP_ADD5_1:%.*]] = fadd fast <4 x double> [[VEC_IND]], +; AUTO_VEC-NEXT: [[STEP_ADD6_1:%.*]] = fadd fast <4 x double> [[VEC_IND]], +; AUTO_VEC-NEXT: [[TMP14:%.*]] = getelementptr double, double* [[A]], i64 [[INDEX_NEXT]] +; AUTO_VEC-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP14]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND_NEXT]], <4 x double>* [[TMP15]], align 8 +; AUTO_VEC-NEXT: [[TMP16:%.*]] = getelementptr double, double* [[TMP14]], i64 4 +; AUTO_VEC-NEXT: [[TMP17:%.*]] = bitcast double* [[TMP16]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_1]], <4 x double>* [[TMP17]], align 8 +; AUTO_VEC-NEXT: [[TMP18:%.*]] = getelementptr double, double* [[TMP14]], i64 8 +; AUTO_VEC-NEXT: [[TMP19:%.*]] = bitcast double* [[TMP18]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD5_1]], <4 x double>* [[TMP19]], align 8 +; AUTO_VEC-NEXT: [[TMP20:%.*]] = getelementptr double, double* [[TMP14]], i64 12 +; AUTO_VEC-NEXT: [[TMP21:%.*]] = bitcast double* [[TMP20]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD6_1]], <4 x double>* [[TMP21]], align 8 +; AUTO_VEC-NEXT: [[INDEX_NEXT_1:%.*]] = or i64 [[INDEX]], 32 +; AUTO_VEC-NEXT: [[VEC_IND_NEXT_1:%.*]] = fadd fast <4 x double> [[VEC_IND]], +; AUTO_VEC-NEXT: [[STEP_ADD_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], +; AUTO_VEC-NEXT: [[STEP_ADD5_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], +; AUTO_VEC-NEXT: [[STEP_ADD6_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], +; AUTO_VEC-NEXT: [[TMP22:%.*]] = getelementptr double, double* [[A]], i64 [[INDEX_NEXT_1]] +; AUTO_VEC-NEXT: [[TMP23:%.*]] = bitcast double* [[TMP22]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND_NEXT_1]], <4 x double>* [[TMP23]], align 8 +; AUTO_VEC-NEXT: [[TMP24:%.*]] = getelementptr double, double* [[TMP22]], i64 4 +; AUTO_VEC-NEXT: [[TMP25:%.*]] = bitcast double* [[TMP24]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_2]], <4 x double>* [[TMP25]], align 8 +; AUTO_VEC-NEXT: [[TMP26:%.*]] = getelementptr double, double* [[TMP22]], i64 8 +; AUTO_VEC-NEXT: [[TMP27:%.*]] = bitcast double* [[TMP26]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD5_2]], <4 x double>* [[TMP27]], align 8 +; AUTO_VEC-NEXT: [[TMP28:%.*]] = getelementptr double, double* [[TMP22]], i64 12 +; AUTO_VEC-NEXT: [[TMP29:%.*]] = bitcast double* [[TMP28]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD6_2]], <4 x double>* [[TMP29]], align 8 +; AUTO_VEC-NEXT: [[INDEX_NEXT_2:%.*]] = or i64 [[INDEX]], 48 +; AUTO_VEC-NEXT: [[VEC_IND_NEXT_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], +; AUTO_VEC-NEXT: [[STEP_ADD_3:%.*]] = fadd fast <4 x double> [[VEC_IND]], +; AUTO_VEC-NEXT: [[STEP_ADD5_3:%.*]] = fadd fast <4 x double> [[VEC_IND]], +; AUTO_VEC-NEXT: [[STEP_ADD6_3:%.*]] = fadd fast <4 x double> [[VEC_IND]], +; AUTO_VEC-NEXT: [[TMP30:%.*]] = getelementptr double, double* [[A]], i64 [[INDEX_NEXT_2]] +; AUTO_VEC-NEXT: [[TMP31:%.*]] = bitcast double* [[TMP30]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND_NEXT_2]], <4 x double>* [[TMP31]], align 8 +; AUTO_VEC-NEXT: [[TMP32:%.*]] = getelementptr double, double* [[TMP30]], i64 4 +; AUTO_VEC-NEXT: [[TMP33:%.*]] = bitcast double* [[TMP32]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_3]], <4 x double>* [[TMP33]], align 8 +; AUTO_VEC-NEXT: [[TMP34:%.*]] = getelementptr double, double* [[TMP30]], i64 8 +; AUTO_VEC-NEXT: [[TMP35:%.*]] = bitcast double* [[TMP34]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD5_3]], <4 x double>* [[TMP35]], align 8 +; AUTO_VEC-NEXT: [[TMP36:%.*]] = getelementptr double, double* [[TMP30]], i64 12 +; AUTO_VEC-NEXT: [[TMP37:%.*]] = bitcast double* [[TMP36]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD6_3]], <4 x double>* [[TMP37]], align 8 +; AUTO_VEC-NEXT: [[INDEX_NEXT_3]] = add i64 [[INDEX]], 64 +; AUTO_VEC-NEXT: [[VEC_IND_NEXT_3]] = fadd fast <4 x double> [[VEC_IND]], +; AUTO_VEC-NEXT: [[NITER_NSUB_3]] = add i64 [[NITER]], -4 +; AUTO_VEC-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NSUB_3]], 0 +; AUTO_VEC-NEXT: br i1 [[NITER_NCMP_3]], label [[MIDDLE_BLOCK_UNR_LCSSA]], label [[VECTOR_BODY]], !llvm.loop !7 +; AUTO_VEC: middle.block.unr-lcssa: +; AUTO_VEC-NEXT: [[INDEX_UNR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT_3]], [[VECTOR_BODY]] ] +; AUTO_VEC-NEXT: [[VEC_IND_UNR:%.*]] = phi <4 x double> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_3]], [[VECTOR_BODY]] ] +; AUTO_VEC-NEXT: [[LCMP_MOD:%.*]] = icmp eq i64 [[XTRAITER]], 0 +; AUTO_VEC-NEXT: br i1 [[LCMP_MOD]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY_EPIL:%.*]] +; AUTO_VEC: vector.body.epil: +; AUTO_VEC-NEXT: [[INDEX_EPIL:%.*]] = phi i64 [ [[INDEX_NEXT_EPIL:%.*]], [[VECTOR_BODY_EPIL]] ], [ [[INDEX_UNR]], [[MIDDLE_BLOCK_UNR_LCSSA]] ] +; AUTO_VEC-NEXT: [[VEC_IND_EPIL:%.*]] = phi <4 x double> [ [[VEC_IND_NEXT_EPIL:%.*]], [[VECTOR_BODY_EPIL]] ], [ [[VEC_IND_UNR]], [[MIDDLE_BLOCK_UNR_LCSSA]] ] +; AUTO_VEC-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_SUB:%.*]], [[VECTOR_BODY_EPIL]] ], [ [[XTRAITER]], [[MIDDLE_BLOCK_UNR_LCSSA]] ] +; AUTO_VEC-NEXT: [[STEP_ADD_EPIL:%.*]] = fadd fast <4 x double> [[VEC_IND_EPIL]], +; AUTO_VEC-NEXT: [[STEP_ADD5_EPIL:%.*]] = fadd fast <4 x double> [[VEC_IND_EPIL]], +; AUTO_VEC-NEXT: [[STEP_ADD6_EPIL:%.*]] = fadd fast <4 x double> [[VEC_IND_EPIL]], +; AUTO_VEC-NEXT: [[TMP38:%.*]] = getelementptr double, double* [[A]], i64 [[INDEX_EPIL]] +; AUTO_VEC-NEXT: [[TMP39:%.*]] = bitcast double* [[TMP38]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND_EPIL]], <4 x double>* [[TMP39]], align 8 +; AUTO_VEC-NEXT: [[TMP40:%.*]] = getelementptr double, double* [[TMP38]], i64 4 +; AUTO_VEC-NEXT: [[TMP41:%.*]] = bitcast double* [[TMP40]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_EPIL]], <4 x double>* [[TMP41]], align 8 +; AUTO_VEC-NEXT: [[TMP42:%.*]] = getelementptr double, double* [[TMP38]], i64 8 +; AUTO_VEC-NEXT: [[TMP43:%.*]] = bitcast double* [[TMP42]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD5_EPIL]], <4 x double>* [[TMP43]], align 8 +; AUTO_VEC-NEXT: [[TMP44:%.*]] = getelementptr double, double* [[TMP38]], i64 12 +; AUTO_VEC-NEXT: [[TMP45:%.*]] = bitcast double* [[TMP44]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD6_EPIL]], <4 x double>* [[TMP45]], align 8 +; AUTO_VEC-NEXT: [[INDEX_NEXT_EPIL]] = add i64 [[INDEX_EPIL]], 16 +; AUTO_VEC-NEXT: [[VEC_IND_NEXT_EPIL]] = fadd fast <4 x double> [[VEC_IND_EPIL]], +; AUTO_VEC-NEXT: [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1 +; AUTO_VEC-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0 +; AUTO_VEC-NEXT: br i1 [[EPIL_ITER_CMP]], label [[MIDDLE_BLOCK]], label [[VECTOR_BODY_EPIL]], !llvm.loop !8 ; AUTO_VEC: middle.block: ; AUTO_VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] -; AUTO_VEC-NEXT: [[TMP11:%.*]] = add nsw i64 [[N_VEC]], -1 -; AUTO_VEC-NEXT: [[CAST_CMO:%.*]] = sitofp i64 [[TMP11]] to double -; AUTO_VEC-NEXT: [[TMP12:%.*]] = fmul fast double [[CAST_CMO]], 3.000000e+00 +; AUTO_VEC-NEXT: [[TMP46:%.*]] = add nsw i64 [[N_VEC]], -1 +; AUTO_VEC-NEXT: [[CAST_CMO:%.*]] = sitofp i64 [[TMP46]] to double +; AUTO_VEC-NEXT: [[TMP47:%.*]] = fmul fast double [[CAST_CMO]], 3.000000e+00 ; AUTO_VEC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; AUTO_VEC: for.body: ; AUTO_VEC-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] @@ -244,9 +424,9 @@ ; AUTO_VEC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; AUTO_VEC-NEXT: [[J_NEXT]] = fadd fast double [[J]], 3.000000e+00 ; AUTO_VEC-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; AUTO_VEC-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop !7 +; AUTO_VEC-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop !9 ; AUTO_VEC: for.end: -; AUTO_VEC-NEXT: [[J_LCSSA:%.*]] = phi double [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[J]], [[FOR_BODY]] ] +; AUTO_VEC-NEXT: [[J_LCSSA:%.*]] = phi double [ [[TMP47]], [[MIDDLE_BLOCK]] ], [ [[J]], [[FOR_BODY]] ] ; AUTO_VEC-NEXT: ret double [[J_LCSSA]] ; entry: @@ -270,18 +450,74 @@ define double @external_use_without_fast_math(double* %a, i64 %n) { ; AUTO_VEC-LABEL: @external_use_without_fast_math( ; AUTO_VEC-NEXT: entry: +; AUTO_VEC-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N:%.*]], 1 +; AUTO_VEC-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1 +; AUTO_VEC-NEXT: [[TMP1:%.*]] = add nsw i64 [[SMAX]], -1 +; AUTO_VEC-NEXT: [[XTRAITER:%.*]] = and i64 [[SMAX]], 7 +; AUTO_VEC-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 7 +; AUTO_VEC-NEXT: br i1 [[TMP2]], label [[FOR_END_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]] +; AUTO_VEC: entry.new: +; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = sub nsw i64 [[SMAX]], [[XTRAITER]] ; AUTO_VEC-NEXT: br label [[FOR_BODY:%.*]] ; AUTO_VEC: for.body: -; AUTO_VEC-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[FOR_BODY]] ] -; AUTO_VEC-NEXT: [[J:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[FOR_BODY]] ] +; AUTO_VEC-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[I_NEXT_7:%.*]], [[FOR_BODY]] ] +; AUTO_VEC-NEXT: [[J:%.*]] = phi double [ 0.000000e+00, [[ENTRY_NEW]] ], [ [[J_NEXT_7:%.*]], [[FOR_BODY]] ] +; AUTO_VEC-NEXT: [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[ENTRY_NEW]] ], [ [[NITER_NSUB_7:%.*]], [[FOR_BODY]] ] ; AUTO_VEC-NEXT: [[TMP0:%.*]] = getelementptr double, double* [[A:%.*]], i64 [[I]] ; AUTO_VEC-NEXT: store double [[J]], double* [[TMP0]], align 8 -; AUTO_VEC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 -; AUTO_VEC-NEXT: [[J_NEXT]] = fadd double [[J]], 3.000000e+00 -; AUTO_VEC-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N:%.*]] -; AUTO_VEC-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; AUTO_VEC-NEXT: [[I_NEXT:%.*]] = or i64 [[I]], 1 +; AUTO_VEC-NEXT: [[J_NEXT:%.*]] = fadd double [[J]], 3.000000e+00 +; AUTO_VEC-NEXT: [[TMP0_1:%.*]] = getelementptr double, double* [[A]], i64 [[I_NEXT]] +; AUTO_VEC-NEXT: store double [[J_NEXT]], double* [[TMP0_1]], align 8 +; AUTO_VEC-NEXT: [[I_NEXT_1:%.*]] = or i64 [[I]], 2 +; AUTO_VEC-NEXT: [[J_NEXT_1:%.*]] = fadd double [[J_NEXT]], 3.000000e+00 +; AUTO_VEC-NEXT: [[TMP0_2:%.*]] = getelementptr double, double* [[A]], i64 [[I_NEXT_1]] +; AUTO_VEC-NEXT: store double [[J_NEXT_1]], double* [[TMP0_2]], align 8 +; AUTO_VEC-NEXT: [[I_NEXT_2:%.*]] = or i64 [[I]], 3 +; AUTO_VEC-NEXT: [[J_NEXT_2:%.*]] = fadd double [[J_NEXT_1]], 3.000000e+00 +; AUTO_VEC-NEXT: [[TMP0_3:%.*]] = getelementptr double, double* [[A]], i64 [[I_NEXT_2]] +; AUTO_VEC-NEXT: store double [[J_NEXT_2]], double* [[TMP0_3]], align 8 +; AUTO_VEC-NEXT: [[I_NEXT_3:%.*]] = or i64 [[I]], 4 +; AUTO_VEC-NEXT: [[J_NEXT_3:%.*]] = fadd double [[J_NEXT_2]], 3.000000e+00 +; AUTO_VEC-NEXT: [[TMP0_4:%.*]] = getelementptr double, double* [[A]], i64 [[I_NEXT_3]] +; AUTO_VEC-NEXT: store double [[J_NEXT_3]], double* [[TMP0_4]], align 8 +; AUTO_VEC-NEXT: [[I_NEXT_4:%.*]] = or i64 [[I]], 5 +; AUTO_VEC-NEXT: [[J_NEXT_4:%.*]] = fadd double [[J_NEXT_3]], 3.000000e+00 +; AUTO_VEC-NEXT: [[TMP0_5:%.*]] = getelementptr double, double* [[A]], i64 [[I_NEXT_4]] +; AUTO_VEC-NEXT: store double [[J_NEXT_4]], double* [[TMP0_5]], align 8 +; AUTO_VEC-NEXT: [[I_NEXT_5:%.*]] = or i64 [[I]], 6 +; AUTO_VEC-NEXT: [[J_NEXT_5:%.*]] = fadd double [[J_NEXT_4]], 3.000000e+00 +; AUTO_VEC-NEXT: [[TMP0_6:%.*]] = getelementptr double, double* [[A]], i64 [[I_NEXT_5]] +; AUTO_VEC-NEXT: store double [[J_NEXT_5]], double* [[TMP0_6]], align 8 +; AUTO_VEC-NEXT: [[I_NEXT_6:%.*]] = or i64 [[I]], 7 +; AUTO_VEC-NEXT: [[J_NEXT_6:%.*]] = fadd double [[J_NEXT_5]], 3.000000e+00 +; AUTO_VEC-NEXT: [[TMP0_7:%.*]] = getelementptr double, double* [[A]], i64 [[I_NEXT_6]] +; AUTO_VEC-NEXT: store double [[J_NEXT_6]], double* [[TMP0_7]], align 8 +; AUTO_VEC-NEXT: [[I_NEXT_7]] = add nuw nsw i64 [[I]], 8 +; AUTO_VEC-NEXT: [[J_NEXT_7]] = fadd double [[J_NEXT_6]], 3.000000e+00 +; AUTO_VEC-NEXT: [[NITER_NSUB_7]] = add i64 [[NITER]], -8 +; AUTO_VEC-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NSUB_7]], 0 +; AUTO_VEC-NEXT: br i1 [[NITER_NCMP_7]], label [[FOR_END_UNR_LCSSA]], label [[FOR_BODY]] +; AUTO_VEC: for.end.unr-lcssa: +; AUTO_VEC-NEXT: [[J_LCSSA_PH:%.*]] = phi double [ undef, [[ENTRY:%.*]] ], [ [[J_NEXT_6]], [[FOR_BODY]] ] +; AUTO_VEC-NEXT: [[I_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[I_NEXT_7]], [[FOR_BODY]] ] +; AUTO_VEC-NEXT: [[J_UNR:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[J_NEXT_7]], [[FOR_BODY]] ] +; AUTO_VEC-NEXT: [[LCMP_MOD:%.*]] = icmp eq i64 [[XTRAITER]], 0 +; AUTO_VEC-NEXT: br i1 [[LCMP_MOD]], label [[FOR_END:%.*]], label [[FOR_BODY_EPIL:%.*]] +; AUTO_VEC: for.body.epil: +; AUTO_VEC-NEXT: [[I_EPIL:%.*]] = phi i64 [ [[I_NEXT_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[I_UNR]], [[FOR_END_UNR_LCSSA]] ] +; AUTO_VEC-NEXT: [[J_EPIL:%.*]] = phi double [ [[J_NEXT_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[J_UNR]], [[FOR_END_UNR_LCSSA]] ] +; AUTO_VEC-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_SUB:%.*]], [[FOR_BODY_EPIL]] ], [ [[XTRAITER]], [[FOR_END_UNR_LCSSA]] ] +; AUTO_VEC-NEXT: [[TMP0_EPIL:%.*]] = getelementptr double, double* [[A]], i64 [[I_EPIL]] +; AUTO_VEC-NEXT: store double [[J_EPIL]], double* [[TMP0_EPIL]], align 8 +; AUTO_VEC-NEXT: [[I_NEXT_EPIL]] = add nuw nsw i64 [[I_EPIL]], 1 +; AUTO_VEC-NEXT: [[J_NEXT_EPIL]] = fadd double [[J_EPIL]], 3.000000e+00 +; AUTO_VEC-NEXT: [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1 +; AUTO_VEC-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0 +; AUTO_VEC-NEXT: br i1 [[EPIL_ITER_CMP]], label [[FOR_END]], label [[FOR_BODY_EPIL]], !llvm.loop !10 ; AUTO_VEC: for.end: -; AUTO_VEC-NEXT: ret double [[J]] +; AUTO_VEC-NEXT: [[J_LCSSA:%.*]] = phi double [ [[J_LCSSA_PH]], [[FOR_END_UNR_LCSSA]] ], [ [[J_EPIL]], [[FOR_BODY_EPIL]] ] +; AUTO_VEC-NEXT: ret double [[J_LCSSA]] ; entry: br label %for.body diff --git a/llvm/unittests/Transforms/Utils/UnrollLoopTest.cpp b/llvm/unittests/Transforms/Utils/UnrollLoopTest.cpp --- a/llvm/unittests/Transforms/Utils/UnrollLoopTest.cpp +++ b/llvm/unittests/Transforms/Utils/UnrollLoopTest.cpp @@ -70,7 +70,8 @@ bool PreserveLCSSA = L->isRecursivelyLCSSAForm(DT,LI); - bool ret = UnrollRuntimeLoopRemainder(L, 4, true, false, false, false, &LI, - &SE, &DT, &AC, PreserveLCSSA); + bool ret = + UnrollRuntimeLoopRemainder(L, 4, true, false, false, false, &LI, &SE, &DT, + &AC, /*TTI=*/nullptr, PreserveLCSSA); EXPECT_FALSE(ret); }