Index: include/llvm/Analysis/ScalarEvolution.h =================================================================== --- include/llvm/Analysis/ScalarEvolution.h +++ include/llvm/Analysis/ScalarEvolution.h @@ -520,9 +520,14 @@ const SCEV *Exact; const SCEV *Max; + /// A predicate union guard for this ExitLimit. The result is only + /// valid if this predicate evaluates to 'true' at run-time. + SCEVUnionPredicate Pred; + /*implicit*/ ExitLimit(const SCEV *E) : Exact(E), Max(E) {} - ExitLimit(const SCEV *E, const SCEV *M) : Exact(E), Max(M) { + ExitLimit(const SCEV *E, const SCEV *M, SCEVUnionPredicate &P) + : Exact(E), Max(M), Pred(P) { assert((isa(Exact) || !isa(Max)) && "Exact is not allowed to be less precise than Max"); @@ -542,6 +547,7 @@ AssertingVH ExitingBlock; const SCEV *ExactNotTaken; PointerIntPair NextExit; + SCEVUnionPredicate Pred; ExitNotTakenInfo() : ExitingBlock(nullptr), ExactNotTaken(nullptr) {} @@ -578,7 +584,8 @@ /// Initialize BackedgeTakenInfo from a list of exact exit counts. BackedgeTakenInfo( SmallVectorImpl< std::pair > &ExitCounts, - bool Complete, const SCEV *MaxCount); + SmallVectorImpl &ExitPreds, bool Complete, + const SCEV *MaxCount); /// Test whether this BackedgeTakenInfo contains any computed information, /// or whether it's all SCEVCouldNotCompute values. @@ -587,11 +594,21 @@ } /// Return an expression indicating the exact backedge-taken count of the - /// loop if it is known, or SCEVCouldNotCompute otherwise. This is the - /// number of times the loop header can be guaranteed to execute, minus - /// one. + /// count of the loop if it is known and always correct (independent + /// of any assumptions that should be checked at run-time), or + /// SCEVCouldNotCompute otherwise. This is the number of times the + /// loop header can be guaranteed to execute, minus one. const SCEV *getExact(ScalarEvolution *SE) const; + /// getGuardedExact - Return an expression indicating the exact + /// backedge-taken count of the loop if it is known, or + /// SCEVCouldNotCompute otherwise. Returns the SCEV predicates that + /// need to be checked at run-time in order for this answer to be valid + /// in \p Predicates. This is the number of times the loop header can be + /// guaranteed to execute, minus one. + const SCEV *getGuardedExact(ScalarEvolution *SE, + SCEVUnionPredicate &Predicates) const; + /// Return the number of times this loop exit may fall through to the back /// edge, or SCEVCouldNotCompute. The loop is guaranteed not to exit via /// this block before this number of iterations, but may exit via another @@ -727,7 +744,8 @@ ICmpInst *ExitCond, BasicBlock *TBB, BasicBlock *FBB, - bool IsSubExpr); + bool IsSubExpr, + bool UseAssumptions = false); /// Compute the number of times the backedge of the specified loop will /// execute if its exit condition were a switch with a single exiting case @@ -765,7 +783,8 @@ /// Return the number of times an exit condition comparing the specified /// value to zero will execute. If not computable, return CouldNotCompute. - ExitLimit HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr); + ExitLimit HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr, + bool Force = false); /// Return the number of times an exit condition checking the specified /// value for nonzero will execute. If not computable, return @@ -776,9 +795,12 @@ /// less-than comparison will execute. If not computable, return /// CouldNotCompute. isSigned specifies whether the less-than is signed. ExitLimit HowManyLessThans(const SCEV *LHS, const SCEV *RHS, - const Loop *L, bool isSigned, bool IsSubExpr); + const Loop *L, bool isSigned, bool IsSubExpr, + bool Force = false); + ExitLimit HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS, - const Loop *L, bool isSigned, bool IsSubExpr); + const Loop *L, bool isSigned, bool IsSubExpr, + bool Force = false); /// Return a predecessor of BB (which may not be an immediate predecessor) /// which has exactly one successor from which BB is reachable, or null if @@ -1153,6 +1175,9 @@ /// const SCEV *getBackedgeTakenCount(const Loop *L); + const SCEV *getGuardedBackedgeTakenCount(const Loop *L, + SCEVUnionPredicate &Predicates); + /// Similar to getBackedgeTakenCount, except return the least SCEV value /// that is known never to be less than the actual backedge taken count. const SCEV *getMaxBackedgeTakenCount(const Loop *L); @@ -1483,6 +1508,8 @@ /// by ScalarEvolution is guaranteed to be preserved, even when adding new /// predicates. const SCEV *getSCEV(Value *V); + /// Get the (predicated) backedge count for the analyzed loop. + const SCEV *getBackedgeTakenCount(); /// \brief Adds a new predicate. void addPredicate(const SCEVPredicate &Pred); /// \brief Attempts to produce an AddRecExpr for V by adding additional @@ -1524,6 +1551,8 @@ /// figure out if the predicate has changed from the last rewrite of the /// SCEV. If so, we need to perform a new rewrite. unsigned Generation; + /// The backedge taken count. + const SCEV *BackedgeCount; }; } Index: lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- lib/Analysis/LoopAccessAnalysis.cpp +++ lib/Analysis/LoopAccessAnalysis.cpp @@ -132,8 +132,7 @@ const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); const SCEVAddRecExpr *AR = dyn_cast(Sc); assert(AR && "Invalid addrec expression"); - ScalarEvolution *SE = PSE.getSE(); - const SCEV *Ex = SE->getBackedgeTakenCount(Lp); + const SCEV *Ex = PSE.getBackedgeTakenCount(); const SCEV *ScStart = AR->getStart(); const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE); @@ -1443,7 +1442,7 @@ } // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop); + const SCEV *ExitCount = PSE.getBackedgeTakenCount(); if (ExitCount == PSE.getSE()->getCouldNotCompute()) { emitAnalysis(LoopAccessReport() << "could not determine number of loop iterations"); Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -4976,6 +4976,13 @@ return getBackedgeTakenInfo(L).getExact(ExitingBlock, this); } +const SCEV * +ScalarEvolution::getGuardedBackedgeTakenCount(const Loop *L, + SCEVUnionPredicate &Preds) { + return getBackedgeTakenInfo(L).getGuardedExact(this, Preds); +} + + /// getBackedgeTakenCount - If the specified loop has a predictable /// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute /// object. The backedge-taken count is the number of times the loop header @@ -5178,6 +5185,9 @@ assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV"); + if (!ENT->Pred.isAlwaysTrue()) + return SE->getCouldNotCompute(); + if (!BECount) BECount = ENT->ExactNotTaken; else if (BECount != ENT->ExactNotTaken) @@ -5187,6 +5197,36 @@ return BECount; } +const SCEV *ScalarEvolution::BackedgeTakenInfo::getGuardedExact( + ScalarEvolution *SE, SCEVUnionPredicate &Preds) const { + // If any exits were not computable, the loop is not computable. + if (!ExitNotTaken.isCompleteList()) + return SE->getCouldNotCompute(); + + // We need exactly one computable exit. + if (!ExitNotTaken.ExitingBlock) + return SE->getCouldNotCompute(); + assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info"); + + const SCEV *BECount = nullptr; + + for (const ExitNotTakenInfo *ENT = &ExitNotTaken; ENT != nullptr; + ENT = ENT->getNextExit()) { + + assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV"); + + if (!BECount) { + BECount = ENT->ExactNotTaken; + } else if (BECount != ENT->ExactNotTaken) { + return SE->getCouldNotCompute(); + } + Preds.add(&(ENT->Pred)); + } + assert(BECount && "Invalid not taken count for loop exit"); + + return BECount; +} + /// getExact - Get the exact not taken count for this loop exit. const SCEV * ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock, @@ -5194,7 +5234,7 @@ for (const ExitNotTakenInfo *ENT = &ExitNotTaken; ENT != nullptr; ENT = ENT->getNextExit()) { - if (ENT->ExitingBlock == ExitingBlock) + if (ENT->ExitingBlock == ExitingBlock && ENT->Pred.isAlwaysTrue()) return ENT->ExactNotTaken; } return SE->getCouldNotCompute(); @@ -5203,6 +5243,12 @@ /// getMax - Get the max backedge taken count for the loop. const SCEV * ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const { + for (const ExitNotTakenInfo *ENT = &ExitNotTaken; ENT != nullptr; + ENT = ENT->getNextExit()) { + if (!ENT->Pred.isAlwaysTrue()) + return SE->getCouldNotCompute(); + } + return Max ? Max : SE->getCouldNotCompute(); } @@ -5229,7 +5275,8 @@ /// computable exit into a persistent ExitNotTakenInfo array. ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( SmallVectorImpl< std::pair > &ExitCounts, - bool Complete, const SCEV *MaxCount) : Max(MaxCount) { + SmallVectorImpl &ExitPreds, bool Complete, + const SCEV *MaxCount) : Max(MaxCount) { if (!Complete) ExitNotTaken.setIncomplete(); @@ -5239,7 +5286,12 @@ ExitNotTaken.ExitingBlock = ExitCounts[0].first; ExitNotTaken.ExactNotTaken = ExitCounts[0].second; - if (NumExits == 1) return; + assert(ExitCounts.size() == ExitPreds.size() && + "ExitCounts should have the same size as ExitPreds"); + ExitNotTaken.Pred.add(ExitPreds[0]); + + if (NumExits == 1) + return; // Handle the rare case of multiple computable exits. ExitNotTakenInfo *ENT = new ExitNotTakenInfo[NumExits-1]; @@ -5249,6 +5301,7 @@ PrevENT->setNextExit(ENT); ENT->ExitingBlock = ExitCounts[i].first; ENT->ExactNotTaken = ExitCounts[i].second; + ENT->Pred.add(ExitPreds[i]); } } @@ -5267,6 +5320,7 @@ L->getExitingBlocks(ExitingBlocks); SmallVector, 4> ExitCounts; + SmallVector ExitCountPreds; bool CouldComputeBECount = true; BasicBlock *Latch = L->getLoopLatch(); // may be NULL. const SCEV *MustExitMaxBECount = nullptr; @@ -5274,6 +5328,7 @@ // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts // and compute maxBECount. + // Do a union of all the predicates here. for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { BasicBlock *ExitBB = ExitingBlocks[i]; ExitLimit EL = computeExitLimit(L, ExitBB); @@ -5284,8 +5339,10 @@ // We couldn't compute an exact value for this exit, so // we won't be able to compute an exact value for the loop. CouldComputeBECount = false; - else + else { ExitCounts.push_back(std::make_pair(ExitBB, EL.Exact)); + ExitCountPreds.push_back(&EL.Pred); + } // 2. Derive the loop's MaxBECount from each exit's max number of // non-exiting iterations. Partition the loop exits into two kinds: @@ -5315,7 +5372,8 @@ } const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount : (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute()); - return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount); + return BackedgeTakenInfo(ExitCounts, ExitCountPreds, CouldComputeBECount, + MaxBECount); } ScalarEvolution::ExitLimit @@ -5445,6 +5503,9 @@ BECount = EL0.Exact; } + SCEVUnionPredicate NP; + NP.add(&EL0.Pred); + NP.add(&EL1.Pred); // There are cases (e.g. PR26207) where computeExitLimitFromCond is able // to be more aggressive when computing BECount than when computing // MaxBECount. In these cases it is possible for EL0.Exact and EL1.Exact @@ -5453,7 +5514,7 @@ !isa(BECount)) MaxBECount = BECount; - return ExitLimit(BECount, MaxBECount); + return ExitLimit(BECount, MaxBECount, NP); } if (BO->getOpcode() == Instruction::Or) { // Recurse on the operands of the or. @@ -5488,7 +5549,10 @@ BECount = EL0.Exact; } - return ExitLimit(BECount, MaxBECount); + SCEVUnionPredicate NP; + NP.add(&EL0.Pred); + NP.add(&EL1.Pred); + return ExitLimit(BECount, MaxBECount, NP); } } @@ -5519,7 +5583,8 @@ ICmpInst *ExitCond, BasicBlock *TBB, BasicBlock *FBB, - bool ControlsExit) { + bool ControlsExit, + bool Force) { // If the condition was exit on true, convert the condition to exit on false ICmpInst::Predicate Cond; @@ -5576,7 +5641,8 @@ switch (Cond) { case ICmpInst::ICMP_NE: { // while (X != Y) // Convert to: while (X-Y != 0) - ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit); + ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit, + Force); if (EL.hasAnyInfo()) return EL; break; } @@ -5589,21 +5655,32 @@ case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_ULT: { // while (X < Y) bool IsSigned = Cond == ICmpInst::ICMP_SLT; - ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, ControlsExit); + ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, ControlsExit, + Force); if (EL.hasAnyInfo()) return EL; break; } case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_UGT: { // while (X > Y) bool IsSigned = Cond == ICmpInst::ICMP_SGT; - ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit); + ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit, + Force); if (EL.hasAnyInfo()) return EL; break; } default: break; } - return computeExitCountExhaustively(L, ExitCond, !L->contains(TBB)); + + ExitLimit EL = computeExitCountExhaustively(L, ExitCond, !L->contains(TBB)); + + // Avoid recursion when using Force = true. + if (EL.hasAnyInfo() || Force) + return EL; + + // We could not prove what the exit limit is without making assumptions. + // Try to compute it using assumptions. + return computeExitLimitFromICmp(L, ExitCond, TBB, FBB, ControlsExit, true); } ScalarEvolution::ExitLimit @@ -5858,7 +5935,8 @@ unsigned BitWidth = getTypeSizeInBits(RHS->getType()); const SCEV *UpperBound = getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth); - return ExitLimit(getCouldNotCompute(), UpperBound); + SCEVUnionPredicate P; + return ExitLimit(getCouldNotCompute(), UpperBound, P); } return getCouldNotCompute(); @@ -6636,7 +6714,9 @@ /// effectively V != 0. We know and take advantage of the fact that this /// expression only being used in a comparison by zero context. ScalarEvolution::ExitLimit -ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) { +ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, + bool Force) { + SCEVUnionPredicate P; // If the value is a constant if (const SCEVConstant *C = dyn_cast(V)) { // If the value is already zero, the branch will execute zero times. @@ -6645,6 +6725,14 @@ } const SCEVAddRecExpr *AddRec = dyn_cast(V); + if ((!AddRec) && Force) { + // Try to make this a chrec using runtime tests. + const SCEV *Expr = convertSCEVToAddRecWithPredicates(V, L, P); + AddRec = dyn_cast(Expr); + if (!AddRec) + return getCouldNotCompute(); + } + if (!AddRec || AddRec->getLoop() != L) return getCouldNotCompute(); @@ -6669,7 +6757,7 @@ // should not accept a root of 2. const SCEV *Val = AddRec->evaluateAtIteration(R1, *this); if (Val->isZero()) - return R1; // We found a quadratic root! + return ExitLimit(R1, R1, P); // We found a quadratic root! } } return getCouldNotCompute(); @@ -6726,7 +6814,7 @@ else MaxBECount = getConstant(CountDown ? CR.getUnsignedMax() : -CR.getUnsignedMin()); - return ExitLimit(Distance, MaxBECount); + return ExitLimit(Distance, MaxBECount, P); } // As a special case, handle the instance where Step is a positive power of @@ -6779,7 +6867,10 @@ auto *NarrowTy = IntegerType::get(getContext(), NarrowWidth); auto *WideTy = Distance->getType(); - return getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy); + const SCEV *Limit = getZeroExtendExpr( + getTruncateExpr(ModuloResult, NarrowTy), + WideTy); + return ExitLimit(Limit, Limit, P); } } @@ -6791,13 +6882,15 @@ if (ControlsExit && AddRec->hasNoSelfWrap()) { const SCEV *Exact = getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); - return ExitLimit(Exact, Exact); + return ExitLimit(Exact, Exact, P); } // Then, try to solve the above equation provided that Start is constant. - if (const SCEVConstant *StartC = dyn_cast(Start)) - return SolveLinEquationWithOverflow(StepC->getAPInt(), -StartC->getAPInt(), - *this); + if (const SCEVConstant *StartC = dyn_cast(Start)) { + const SCEV *E = SolveLinEquationWithOverflow( + StepC->getValue()->getValue(), -StartC->getValue()->getValue(), *this); + return ExitLimit(E, E, P); + } return getCouldNotCompute(); } @@ -8240,12 +8333,20 @@ ScalarEvolution::ExitLimit ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool IsSigned, - bool ControlsExit) { + bool ControlsExit, bool Force) { + SCEVUnionPredicate P; // We handle only IV < Invariant if (!isLoopInvariant(RHS, L)) return getCouldNotCompute(); const SCEVAddRecExpr *IV = dyn_cast(LHS); + if ((!IV) && Force) { + // Try to make this a chrec using runtime tests. + const SCEV *Expr = convertSCEVToAddRecWithPredicates(LHS, L, P); + IV = dyn_cast(Expr); + if (!IV) + return getCouldNotCompute(); + } // Avoid weird loops if (!IV || IV->getLoop() != L || !IV->isAffine()) @@ -8314,18 +8415,26 @@ if (isa(MaxBECount)) MaxBECount = BECount; - return ExitLimit(BECount, MaxBECount); + return ExitLimit(BECount, MaxBECount, P); } ScalarEvolution::ExitLimit ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool IsSigned, - bool ControlsExit) { + bool ControlsExit, bool Force) { + SCEVUnionPredicate P; // We handle only IV > Invariant if (!isLoopInvariant(RHS, L)) return getCouldNotCompute(); const SCEVAddRecExpr *IV = dyn_cast(LHS); + if ((!IV) && Force) { + // Try to make this a chrec using runtime tests. + const SCEV *Expr = convertSCEVToAddRecWithPredicates(LHS, L, P); + IV = dyn_cast(Expr); + if (!IV) + return getCouldNotCompute(); + } // Avoid weird loops if (!IV || IV->getLoop() != L || !IV->isAffine()) @@ -8396,7 +8505,7 @@ if (isa(MaxBECount)) MaxBECount = BECount; - return ExitLimit(BECount, MaxBECount); + return ExitLimit(BECount, MaxBECount, P); } /// getNumIterationsInRange - Return the number of iterations of this loop that @@ -9873,7 +9982,7 @@ PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE, Loop &L) - : SE(SE), L(L), Generation(0) {} + : SE(SE), L(L), Generation(0), BackedgeCount(nullptr) {} const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) { const SCEV *Expr = SE.getSCEV(V); @@ -9894,6 +10003,15 @@ return NewSCEV; } +const SCEV *PredicatedScalarEvolution::getBackedgeTakenCount() { + if (!BackedgeCount) { + SCEVUnionPredicate BackedgePred; + BackedgeCount = SE.getGuardedBackedgeTakenCount(&L, BackedgePred); + addPredicate(BackedgePred); + } + return BackedgeCount; +} + void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) { if (Preds.implies(&Pred)) return; @@ -9958,7 +10076,7 @@ PredicatedScalarEvolution:: PredicatedScalarEvolution(const PredicatedScalarEvolution &Init) : RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds), - Generation(Init.Generation) { + Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) { for (auto I = Init.FlagsMap.begin(), E = Init.FlagsMap.end(); I != E; ++I) FlagsMap.insert(*I); } Index: lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- lib/Analysis/ScalarEvolutionExpander.cpp +++ lib/Analysis/ScalarEvolutionExpander.cpp @@ -1993,7 +1993,8 @@ assert(AR->isAffine() && "Cannot generate RT check for " "non-affine expression"); - const SCEV *ExitCount = SE.getBackedgeTakenCount(AR->getLoop()); + SCEVUnionPredicate Pred; + const SCEV *ExitCount = SE.getGuardedBackedgeTakenCount(AR->getLoop(), Pred); const SCEV *Step = AR->getStepRecurrence(SE); const SCEV *Start = AR->getStart(); Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2646,7 +2646,7 @@ IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); // Find the loop boundaries. ScalarEvolution *SE = PSE.getSE(); - const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(OrigLoop); + const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount(); assert(BackedgeTakenCount != SE->getCouldNotCompute() && "Invalid loop count"); @@ -4115,7 +4115,7 @@ } // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop); + const SCEV *ExitCount = PSE.getBackedgeTakenCount(); if (ExitCount == PSE.getSE()->getCouldNotCompute()) { emitAnalysis(VectorizationReport() << "could not determine number of loop iterations"); Index: test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll @@ -0,0 +1,166 @@ +; RUN: opt -mtriple=aarch64--linux-gnueabi -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s -S | FileCheck %s + +; The following tests contain loops for which SCEV cannot determine the backedge +; taken count. This is because the backedge taken condition is produced by an +; icmp with one of the sides being a loop varying non-AddRec expression. +; However, there is a possibility to normalize this to an AddRec expression +; using SCEV predicates. This allows us to compute a 'guarded' backedge count. +; The Loop Vectorizer is able to version to loop in order to use this guarded +; backedge count and vectorize more loops. + + +; CHECK-LABEL: test_sge +; CHECK-LABEL: vector.scevcheck +; CHECK-LABEL: vector.body +define void @test_sge(i32* noalias %A, + i32* noalias %B, + i32* noalias %C, i32 %N) { +entry: + %cmp13 = icmp eq i32 %N, 0 + br i1 %cmp13, label %for.end, label %for.body.preheader + +for.body.preheader: + br label %for.body + +for.body: + %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ 0, %for.body.preheader ] + %indvars.next = add i16 %indvars.iv, 1 + %indvars.ext = zext i16 %indvars.iv to i32 + + %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext + %1 = load i32, i32* %arrayidx3, align 4 + + %mul4 = mul i32 %1, %0 + + %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext + store i32 %mul4, i32* %arrayidx7, align 4 + + %exitcond = icmp sge i32 %indvars.ext, %N + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + +; CHECK-LABEL: test_uge +; CHECK-LABEL: vector.scevcheck +; CHECK-LABEL: vector.body +define void @test_uge(i32* noalias %A, + i32* noalias %B, + i32* noalias %C, i32 %N, i32 %Offset) { +entry: + %cmp13 = icmp eq i32 %N, 0 + br i1 %cmp13, label %for.end, label %for.body.preheader + +for.body.preheader: + br label %for.body + +for.body: + %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ 0, %for.body.preheader ] + %indvars.next = add i16 %indvars.iv, 1 + + %indvars.ext = sext i16 %indvars.iv to i32 + %indvars.access = add i32 %Offset, %indvars.ext + + %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.access + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.access + %1 = load i32, i32* %arrayidx3, align 4 + + %mul4 = add i32 %1, %0 + + %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.access + store i32 %mul4, i32* %arrayidx7, align 4 + + %exitcond = icmp uge i32 %indvars.ext, %N + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + +; CHECK-LABEL: test_ule +; CHECK-LABEL: vector.scevcheck +; CHECK-LABEL: vector.body +define void @test_ule(i32* noalias %A, + i32* noalias %B, + i32* noalias %C, i32 %N, + i16 %M) { +entry: + %cmp13 = icmp eq i32 %N, 0 + br i1 %cmp13, label %for.end, label %for.body.preheader + +for.body.preheader: + br label %for.body + +for.body: + %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ %M, %for.body.preheader ] + %indvars.next = sub i16 %indvars.iv, 1 + %indvars.ext = zext i16 %indvars.iv to i32 + + %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext + %1 = load i32, i32* %arrayidx3, align 4 + + %mul4 = mul i32 %1, %0 + + %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext + store i32 %mul4, i32* %arrayidx7, align 4 + + %exitcond = icmp ule i32 %indvars.ext, %N + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + +; CHECK-LABEL: test_sle +; CHECK-LABEL: vector.scevcheck +; CHECK-LABEL: vector.body +define void @test_sle(i32* noalias %A, + i32* noalias %B, + i32* noalias %C, i32 %N, + i16 %M) { +entry: + %cmp13 = icmp eq i32 %N, 0 + br i1 %cmp13, label %for.end, label %for.body.preheader + +for.body.preheader: + br label %for.body + +for.body: + %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ %M, %for.body.preheader ] + %indvars.next = sub i16 %indvars.iv, 1 + %indvars.ext = sext i16 %indvars.iv to i32 + + %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext + %1 = load i32, i32* %arrayidx3, align 4 + + %mul4 = mul i32 %1, %0 + + %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext + store i32 %mul4, i32* %arrayidx7, align 4 + + %exitcond = icmp sle i32 %indvars.ext, %N + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +}