Index: include/llvm/Analysis/ScalarEvolution.h =================================================================== --- include/llvm/Analysis/ScalarEvolution.h +++ include/llvm/Analysis/ScalarEvolution.h @@ -520,9 +520,14 @@ const SCEV *Exact; const SCEV *Max; + /// A predicate union guard for this ExitLimit. The result is only + /// valid if this predicate evaluates to 'true' at run-time. + SCEVUnionPredicate Pred; + /*implicit*/ ExitLimit(const SCEV *E) : Exact(E), Max(E) {} - ExitLimit(const SCEV *E, const SCEV *M) : Exact(E), Max(M) { + ExitLimit(const SCEV *E, const SCEV *M, SCEVUnionPredicate &P) + : Exact(E), Max(M), Pred(P) { assert((isa(Exact) || !isa(Max)) && "Exact is not allowed to be less precise than Max"); @@ -534,6 +539,11 @@ return !isa(Exact) || !isa(Max); } + + /// Test whether this ExitLimit contains all information. + bool hasFullInfo() const { + return !isa(Exact); + } }; /// Information about the number of times a particular loop exit may be @@ -542,6 +552,9 @@ AssertingVH ExitingBlock; const SCEV *ExactNotTaken; PointerIntPair NextExit; + /// The information is only valid if Pred evaluates to true, + /// otherwise it's all CouldNotCompute. + SCEVUnionPredicate Pred; ExitNotTakenInfo() : ExitingBlock(nullptr), ExactNotTaken(nullptr) {} @@ -577,8 +590,9 @@ /// Initialize BackedgeTakenInfo from a list of exact exit counts. BackedgeTakenInfo( - SmallVectorImpl< std::pair > &ExitCounts, - bool Complete, const SCEV *MaxCount); + SmallVectorImpl> &ExitCounts, + SmallVectorImpl &ExitPreds, bool Complete, + const SCEV *MaxCount); /// Test whether this BackedgeTakenInfo contains any computed information, /// or whether it's all SCEVCouldNotCompute values. @@ -586,11 +600,21 @@ return ExitNotTaken.ExitingBlock || !isa(Max); } + /// Test whether this BackedgeTakenInfo contains complete information. + bool hasFullInfo() const { + return ExitNotTaken.isCompleteList(); + } + /// Return an expression indicating the exact backedge-taken count of the - /// loop if it is known, or SCEVCouldNotCompute otherwise. This is the - /// number of times the loop header can be guaranteed to execute, minus - /// one. - const SCEV *getExact(ScalarEvolution *SE) const; + /// loop if it is known and always correct (independent of any + /// assumptions that should be checked at run-time), or + /// SCEVCouldNotCompute otherwise. This is the number of times the loop + /// header can be guaranteed to execute, minus one. If the SCEV predicate + /// associated with the answer can be different from AlwaysTrue, we must + /// add the Predicates argument. The SCEV predicate associated with the + /// answer will be added to Predicates. + const SCEV *getExact(ScalarEvolution *SE, + SCEVUnionPredicate *Predicates = nullptr) const; /// Return the number of times this loop exit may fall through to the back /// edge, or SCEVCouldNotCompute. The loop is guaranteed not to exit via @@ -612,6 +636,9 @@ /// Cache the backedge-taken count of the loops for this function as they /// are computed. DenseMap BackedgeTakenCounts; + /// Cache the predicated backedge-taken count of the loops for this + /// function as they are computed. + DenseMap PredicatedBackedgeTakenCounts; /// This map contains entries for all of the PHI instructions that we /// attempt to compute constant evolutions for. This allows us to avoid @@ -713,33 +740,49 @@ void forgetSymbolicName(Instruction *I, const SCEV *SymName); /// Return the BackedgeTakenInfo for the given loop, lazily computing new - /// values if the loop hasn't been analyzed yet. + /// values if the loop hasn't been analyzed yet. The returned result is + /// guaranteed not to be predicated. const BackedgeTakenInfo &getBackedgeTakenInfo(const Loop *L); + /// Similar to getBackedgeTakenInfo, but will add predicates as required + /// with the purpose of returning complete information (the exact backedge + /// taken count will be known). + const BackedgeTakenInfo &getPredicatedBackedgeTakenInfo(const Loop *L); + /// Compute the number of times the specified loop will iterate. - BackedgeTakenInfo computeBackedgeTakenCount(const Loop *L); + /// If IsGuarded is set, we will create new SCEV predicates as necessary in + /// order to return an exact answer. + BackedgeTakenInfo computeBackedgeTakenCount(const Loop *L, bool IsGuarded); /// Compute the number of times the backedge of the specified loop will - /// execute if it exits via the specified block. - ExitLimit computeExitLimit(const Loop *L, BasicBlock *ExitingBlock); + /// execute if it exits via the specified block. If IsGuarded is set, + /// this call will try to use a minimal set of SCEV predicates in order to + /// return an exact answer. + ExitLimit computeExitLimit(const Loop *L, BasicBlock *ExitingBlock, + bool IsGuarded); /// Compute the number of times the backedge of the specified loop will /// execute if its exit condition were a conditional branch of ExitCond, - /// TBB, and FBB. + /// TBB, and FBB. If IsGuarded is set, this call try to use a minimal set + /// of SCEV predicates in order to return an exact answer. ExitLimit computeExitLimitFromCond(const Loop *L, Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB, - bool IsSubExpr); + bool IsSubExpr, + bool Guarded); /// Compute the number of times the backedge of the specified loop will /// execute if its exit condition were a conditional branch of the ICmpInst - /// ExitCond, TBB, and FBB. + /// ExitCond, TBB, and FBB. If CreatePredicates is set, this call try to + /// use a minimal set of SCEV predicates in order to return an exact + /// answer. ExitLimit computeExitLimitFromICmp(const Loop *L, ICmpInst *ExitCond, BasicBlock *TBB, BasicBlock *FBB, - bool IsSubExpr); + bool IsSubExpr, + bool CreatePredicates); /// Compute the number of times the backedge of the specified loop will /// execute if its exit condition were a switch with a single exiting case @@ -777,7 +820,10 @@ /// Return the number of times an exit condition comparing the specified /// value to zero will execute. If not computable, return CouldNotCompute. - ExitLimit HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr); + /// If CreatePredicates is set, this call try to use a minimal set of SCEV + /// predicates in order to return an exact answer. + ExitLimit HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr, + bool CreatePredicates = false); /// Return the number of times an exit condition checking the specified /// value for nonzero will execute. If not computable, return @@ -787,10 +833,15 @@ /// Return the number of times an exit condition containing the specified /// less-than comparison will execute. If not computable, return /// CouldNotCompute. isSigned specifies whether the less-than is signed. - ExitLimit HowManyLessThans(const SCEV *LHS, const SCEV *RHS, - const Loop *L, bool isSigned, bool IsSubExpr); + /// If CreatePredicates is set, this call try to use a minimal set of SCEV + /// predicates in order to return an exact answer. + ExitLimit HowManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, + bool isSigned, bool IsSubExpr, + bool CreatePredicates = false); + ExitLimit HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS, - const Loop *L, bool isSigned, bool IsSubExpr); + const Loop *L, bool isSigned, bool IsSubExpr, + bool CreatePredicates = false); /// Return a predecessor of BB (which may not be an immediate predecessor) /// which has exactly one successor from which BB is reachable, or null if @@ -1168,6 +1219,13 @@ /// const SCEV *getBackedgeTakenCount(const Loop *L); + /// Similar to getBackedgeTakenCount, except it will add a set of + /// SCEV predicates to Predicates that are required to be true in order for + /// the answer to be correct. Predicates can be checked with run-time + /// checks and can be used to perform loop versioning. + const SCEV *getPredicatedBackedgeTakenCount(const Loop *L, + SCEVUnionPredicate &Predicates); + /// Similar to getBackedgeTakenCount, except return the least SCEV value /// that is known never to be less than the actual backedge taken count. const SCEV *getMaxBackedgeTakenCount(const Loop *L); @@ -1489,6 +1547,8 @@ /// by ScalarEvolution is guaranteed to be preserved, even when adding new /// predicates. const SCEV *getSCEV(Value *V); + /// Get the (predicated) backedge count for the analyzed loop. + const SCEV *getBackedgeTakenCount(); /// \brief Adds a new predicate. void addPredicate(const SCEVPredicate &Pred); /// \brief Attempts to produce an AddRecExpr for V by adding additional @@ -1530,6 +1590,8 @@ /// figure out if the predicate has changed from the last rewrite of the /// SCEV. If so, we need to perform a new rewrite. unsigned Generation; + /// The backedge taken count. + const SCEV *BackedgeCount; }; } Index: lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- lib/Analysis/LoopAccessAnalysis.cpp +++ lib/Analysis/LoopAccessAnalysis.cpp @@ -132,8 +132,7 @@ const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); const SCEVAddRecExpr *AR = dyn_cast(Sc); assert(AR && "Invalid addrec expression"); - ScalarEvolution *SE = PSE.getSE(); - const SCEV *Ex = SE->getBackedgeTakenCount(Lp); + const SCEV *Ex = PSE.getBackedgeTakenCount(); const SCEV *ScStart = AR->getStart(); const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE); @@ -1447,7 +1446,7 @@ } // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop); + const SCEV *ExitCount = PSE.getBackedgeTakenCount(); if (ExitCount == PSE.getSE()->getCouldNotCompute()) { emitAnalysis(LoopAccessReport() << "could not determine number of loop iterations"); Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -5124,6 +5124,12 @@ return getBackedgeTakenInfo(L).getExact(ExitingBlock, this); } +const SCEV * +ScalarEvolution::getPredicatedBackedgeTakenCount(const Loop *L, + SCEVUnionPredicate &Preds) { + return getPredicatedBackedgeTakenInfo(L).getExact(this, &Preds); +} + /// getBackedgeTakenCount - If the specified loop has a predictable /// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute /// object. The backedge-taken count is the number of times the loop header @@ -5159,6 +5165,23 @@ } const ScalarEvolution::BackedgeTakenInfo & +ScalarEvolution::getPredicatedBackedgeTakenInfo(const Loop *L) { + auto &BTI = getBackedgeTakenInfo(L); + if (BTI.hasFullInfo()) + return BTI; + + std::pair::iterator, bool> Pair = + PredicatedBackedgeTakenCounts.insert({L, BackedgeTakenInfo()}); + + if (!Pair.second) + return Pair.first->second; + + BackedgeTakenInfo Result = computeBackedgeTakenCount(L, true); + + return PredicatedBackedgeTakenCounts.find(L)->second = Result; +} + +const ScalarEvolution::BackedgeTakenInfo & ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // Initially insert an invalid entry for this loop. If the insertion // succeeds, proceed to actually compute a backedge-taken count and @@ -5173,7 +5196,7 @@ // computeBackedgeTakenCount may allocate memory for its result. Inserting it // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result // must be cleared in this scope. - BackedgeTakenInfo Result = computeBackedgeTakenCount(L); + BackedgeTakenInfo Result = computeBackedgeTakenCount(L, false); if (Result.getExact(this) != getCouldNotCompute()) { assert(isLoopInvariant(Result.getExact(this), L) && @@ -5238,12 +5261,17 @@ /// compute a trip count, or if the loop is deleted. void ScalarEvolution::forgetLoop(const Loop *L) { // Drop any stored trip count value. - DenseMap::iterator BTCPos = - BackedgeTakenCounts.find(L); - if (BTCPos != BackedgeTakenCounts.end()) { - BTCPos->second.clear(); - BackedgeTakenCounts.erase(BTCPos); - } + auto RemoveLoopFromBackedgeMap = + [L] (DenseMap &Map) { + auto BTCPos = Map.find(L); + if (BTCPos != Map.end()) { + BTCPos->second.clear(); + Map.erase(BTCPos); + } + }; + + RemoveLoopFromBackedgeMap(BackedgeTakenCounts); + RemoveLoopFromBackedgeMap(PredicatedBackedgeTakenCounts); // Drop information about expressions based on loop-header PHIs. SmallVector Worklist; @@ -5312,7 +5340,8 @@ /// is the caller's responsibility to specify the relevant loop exit using /// getExact(ExitingBlock, SE). const SCEV * -ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const { +ScalarEvolution::BackedgeTakenInfo::getExact( + ScalarEvolution *SE, SCEVUnionPredicate *Preds) const { // If any exits were not computable, the loop is not computable. if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute(); @@ -5330,6 +5359,12 @@ BECount = ENT->ExactNotTaken; else if (BECount != ENT->ExactNotTaken) return SE->getCouldNotCompute(); + + if (Preds) + Preds->add(&ENT->Pred); + + assert((Preds || ENT->Pred.isAlwaysTrue()) && + "Predicate should be always true!"); } assert(BECount && "Invalid not taken count for loop exit"); return BECount; @@ -5339,10 +5374,10 @@ const SCEV * ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock, ScalarEvolution *SE) const { - for (const ExitNotTakenInfo *ENT = &ExitNotTaken; - ENT != nullptr; ENT = ENT->getNextExit()) { + for (const ExitNotTakenInfo *ENT = &ExitNotTaken; ENT != nullptr; + ENT = ENT->getNextExit()) { - if (ENT->ExitingBlock == ExitingBlock) + if (ENT->ExitingBlock == ExitingBlock && ENT->Pred.isAlwaysTrue()) return ENT->ExactNotTaken; } return SE->getCouldNotCompute(); @@ -5351,6 +5386,12 @@ /// getMax - Get the max backedge taken count for the loop. const SCEV * ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const { + for (const ExitNotTakenInfo *ENT = &ExitNotTaken; ENT != nullptr; + ENT = ENT->getNextExit()) { + if (!ENT->Pred.isAlwaysTrue()) + return SE->getCouldNotCompute(); + } + return Max ? Max : SE->getCouldNotCompute(); } @@ -5376,8 +5417,10 @@ /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each /// computable exit into a persistent ExitNotTakenInfo array. ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( - SmallVectorImpl< std::pair > &ExitCounts, - bool Complete, const SCEV *MaxCount) : Max(MaxCount) { + SmallVectorImpl> &ExitCounts, + SmallVectorImpl &ExitPreds, bool Complete, + const SCEV *MaxCount) + : Max(MaxCount) { if (!Complete) ExitNotTaken.setIncomplete(); @@ -5387,16 +5430,22 @@ ExitNotTaken.ExitingBlock = ExitCounts[0].first; ExitNotTaken.ExactNotTaken = ExitCounts[0].second; + + assert(ExitCounts.size() == ExitPreds.size() && + "ExitCounts should have the same size as ExitPreds"); + ExitNotTaken.Pred.add(ExitPreds[0]); + if (NumExits == 1) return; // Handle the rare case of multiple computable exits. - ExitNotTakenInfo *ENT = new ExitNotTakenInfo[NumExits-1]; + ExitNotTakenInfo *ENT = new ExitNotTakenInfo[NumExits - 1]; ExitNotTakenInfo *PrevENT = &ExitNotTaken; for (unsigned i = 1; i < NumExits; ++i, PrevENT = ENT, ++ENT) { PrevENT->setNextExit(ENT); ENT->ExitingBlock = ExitCounts[i].first; ENT->ExactNotTaken = ExitCounts[i].second; + ENT->Pred.add(ExitPreds[i]); } } @@ -5410,11 +5459,12 @@ /// computeBackedgeTakenCount - Compute the number of times the backedge /// of the specified loop will execute. ScalarEvolution::BackedgeTakenInfo -ScalarEvolution::computeBackedgeTakenCount(const Loop *L) { +ScalarEvolution::computeBackedgeTakenCount(const Loop *L, bool Guarded) { SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); SmallVector, 4> ExitCounts; + SmallVector ExitCountPreds; bool CouldComputeBECount = true; BasicBlock *Latch = L->getLoopLatch(); // may be NULL. const SCEV *MustExitMaxBECount = nullptr; @@ -5422,9 +5472,10 @@ // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts // and compute maxBECount. + // Do a union of all the predicates here. for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { BasicBlock *ExitBB = ExitingBlocks[i]; - ExitLimit EL = computeExitLimit(L, ExitBB); + ExitLimit EL = computeExitLimit(L, ExitBB, Guarded); // 1. For each exit that can be computed, add an entry to ExitCounts. // CouldComputeBECount is true only if all exits can be computed. @@ -5432,8 +5483,10 @@ // We couldn't compute an exact value for this exit, so // we won't be able to compute an exact value for the loop. CouldComputeBECount = false; - else + else { ExitCounts.push_back({ExitBB, EL.Exact}); + ExitCountPreds.push_back(&EL.Pred); + } // 2. Derive the loop's MaxBECount from each exit's max number of // non-exiting iterations. Partition the loop exits into two kinds: @@ -5463,11 +5516,13 @@ } const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount : (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute()); - return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount); + return BackedgeTakenInfo(ExitCounts, ExitCountPreds, CouldComputeBECount, + MaxBECount); } ScalarEvolution::ExitLimit -ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { +ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock, + bool Guarded) { // Okay, we've chosen an exiting block. See what condition causes us to exit // at this block and remember the exit block and whether all other targets @@ -5534,7 +5589,7 @@ // Proceed to the next level to examine the exit condition expression. return computeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0), BI->getSuccessor(1), - /*ControlsExit=*/IsOnlyExit); + /*ControlsExit=*/IsOnlyExit, Guarded); } if (SwitchInst *SI = dyn_cast(Term)) @@ -5557,16 +5612,19 @@ Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB, - bool ControlsExit) { + bool ControlsExit, + bool Guarded) { // Check if the controlling expression for this loop is an And or Or. if (BinaryOperator *BO = dyn_cast(ExitCond)) { if (BO->getOpcode() == Instruction::And) { // Recurse on the operands of the and. bool EitherMayExit = L->contains(TBB); ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, - ControlsExit && !EitherMayExit); + ControlsExit && !EitherMayExit, + Guarded); ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, - ControlsExit && !EitherMayExit); + ControlsExit && !EitherMayExit, + Guarded); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); if (EitherMayExit) { @@ -5593,6 +5651,9 @@ BECount = EL0.Exact; } + SCEVUnionPredicate NP; + NP.add(&EL0.Pred); + NP.add(&EL1.Pred); // There are cases (e.g. PR26207) where computeExitLimitFromCond is able // to be more aggressive when computing BECount than when computing // MaxBECount. In these cases it is possible for EL0.Exact and EL1.Exact @@ -5601,15 +5662,17 @@ !isa(BECount)) MaxBECount = BECount; - return ExitLimit(BECount, MaxBECount); + return ExitLimit(BECount, MaxBECount, NP); } if (BO->getOpcode() == Instruction::Or) { // Recurse on the operands of the or. bool EitherMayExit = L->contains(FBB); ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, - ControlsExit && !EitherMayExit); + ControlsExit && !EitherMayExit, + Guarded); ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, - ControlsExit && !EitherMayExit); + ControlsExit && !EitherMayExit, + Guarded); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); if (EitherMayExit) { @@ -5636,14 +5699,25 @@ BECount = EL0.Exact; } - return ExitLimit(BECount, MaxBECount); + SCEVUnionPredicate NP; + NP.add(&EL0.Pred); + NP.add(&EL1.Pred); + return ExitLimit(BECount, MaxBECount, NP); } } // With an icmp, it may be feasible to compute an exact backedge-taken count. // Proceed to the next level to examine the icmp. - if (ICmpInst *ExitCondICmp = dyn_cast(ExitCond)) - return computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit); + if (ICmpInst *ExitCondICmp = dyn_cast(ExitCond)) { + ExitLimit EL = computeExitLimitFromICmp(L, ExitCondICmp, TBB, + FBB, ControlsExit, false); + if (EL.hasFullInfo() || !Guarded) + return EL; + + // Try again, but use SCEV predicates this time. + return computeExitLimitFromICmp(L, ExitCondICmp, TBB, + FBB, ControlsExit, true); + } // Check for a constant condition. These are normally stripped out by // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to @@ -5667,7 +5741,8 @@ ICmpInst *ExitCond, BasicBlock *TBB, BasicBlock *FBB, - bool ControlsExit) { + bool ControlsExit, + bool CreatePredicates) { // If the condition was exit on true, convert the condition to exit on false ICmpInst::Predicate Cond; @@ -5724,7 +5799,8 @@ switch (Cond) { case ICmpInst::ICMP_NE: { // while (X != Y) // Convert to: while (X-Y != 0) - ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit); + ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit, + CreatePredicates); if (EL.hasAnyInfo()) return EL; break; } @@ -5737,14 +5813,17 @@ case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_ULT: { // while (X < Y) bool IsSigned = Cond == ICmpInst::ICMP_SLT; - ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, ControlsExit); + ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, ControlsExit, + CreatePredicates); if (EL.hasAnyInfo()) return EL; break; } case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_UGT: { // while (X > Y) bool IsSigned = Cond == ICmpInst::ICMP_SGT; - ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit); + ExitLimit EL = + HowManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit, + CreatePredicates); if (EL.hasAnyInfo()) return EL; break; } @@ -6006,7 +6085,8 @@ unsigned BitWidth = getTypeSizeInBits(RHS->getType()); const SCEV *UpperBound = getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth); - return ExitLimit(getCouldNotCompute(), UpperBound); + SCEVUnionPredicate P; + return ExitLimit(getCouldNotCompute(), UpperBound, P); } return getCouldNotCompute(); @@ -6783,7 +6863,9 @@ /// effectively V != 0. We know and take advantage of the fact that this /// expression only being used in a comparison by zero context. ScalarEvolution::ExitLimit -ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) { +ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, + bool CreatePredicates) { + SCEVUnionPredicate P; // If the value is a constant if (const SCEVConstant *C = dyn_cast(V)) { // If the value is already zero, the branch will execute zero times. @@ -6792,6 +6874,14 @@ } const SCEVAddRecExpr *AddRec = dyn_cast(V); + if (!AddRec && CreatePredicates) { + // Try to make this a chrec using runtime tests. + const SCEV *Expr = convertSCEVToAddRecWithPredicates(V, L, P); + AddRec = dyn_cast(Expr); + if (!AddRec) + return getCouldNotCompute(); + } + if (!AddRec || AddRec->getLoop() != L) return getCouldNotCompute(); @@ -6816,7 +6906,7 @@ // should not accept a root of 2. const SCEV *Val = AddRec->evaluateAtIteration(R1, *this); if (Val->isZero()) - return R1; // We found a quadratic root! + return ExitLimit(R1, R1, P); // We found a quadratic root! } } return getCouldNotCompute(); @@ -6873,7 +6963,7 @@ else MaxBECount = getConstant(CountDown ? CR.getUnsignedMax() : -CR.getUnsignedMin()); - return ExitLimit(Distance, MaxBECount); + return ExitLimit(Distance, MaxBECount, P); } // As a special case, handle the instance where Step is a positive power of @@ -6926,7 +7016,9 @@ auto *NarrowTy = IntegerType::get(getContext(), NarrowWidth); auto *WideTy = Distance->getType(); - return getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy); + const SCEV *Limit = + getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy); + return ExitLimit(Limit, Limit, P); } } @@ -6938,13 +7030,15 @@ if (ControlsExit && AddRec->hasNoSelfWrap()) { const SCEV *Exact = getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); - return ExitLimit(Exact, Exact); + return ExitLimit(Exact, Exact, P); } // Then, try to solve the above equation provided that Start is constant. - if (const SCEVConstant *StartC = dyn_cast(Start)) - return SolveLinEquationWithOverflow(StepC->getAPInt(), -StartC->getAPInt(), - *this); + if (const SCEVConstant *StartC = dyn_cast(Start)) { + const SCEV *E = SolveLinEquationWithOverflow( + StepC->getValue()->getValue(), -StartC->getValue()->getValue(), *this); + return ExitLimit(E, E, P); + } return getCouldNotCompute(); } @@ -8387,12 +8481,20 @@ ScalarEvolution::ExitLimit ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool IsSigned, - bool ControlsExit) { + bool ControlsExit, bool CreatePredicates) { + SCEVUnionPredicate P; // We handle only IV < Invariant if (!isLoopInvariant(RHS, L)) return getCouldNotCompute(); const SCEVAddRecExpr *IV = dyn_cast(LHS); + if (!IV && CreatePredicates) { + // Try to make this a chrec using runtime tests. + const SCEV *Expr = convertSCEVToAddRecWithPredicates(LHS, L, P); + IV = dyn_cast(Expr); + if (!IV) + return getCouldNotCompute(); + } // Avoid weird loops if (!IV || IV->getLoop() != L || !IV->isAffine()) @@ -8461,18 +8563,27 @@ if (isa(MaxBECount)) MaxBECount = BECount; - return ExitLimit(BECount, MaxBECount); + return ExitLimit(BECount, MaxBECount, P); } ScalarEvolution::ExitLimit ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool IsSigned, - bool ControlsExit) { + bool ControlsExit, + bool CreatePredicates) { + SCEVUnionPredicate P; // We handle only IV > Invariant if (!isLoopInvariant(RHS, L)) return getCouldNotCompute(); const SCEVAddRecExpr *IV = dyn_cast(LHS); + if (!IV && CreatePredicates) { + // Try to make this a chrec using runtime tests. + const SCEV *Expr = convertSCEVToAddRecWithPredicates(LHS, L, P); + IV = dyn_cast(Expr); + if (!IV) + return getCouldNotCompute(); + } // Avoid weird loops if (!IV || IV->getLoop() != L || !IV->isAffine()) @@ -8543,7 +8654,7 @@ if (isa(MaxBECount)) MaxBECount = BECount; - return ExitLimit(BECount, MaxBECount); + return ExitLimit(BECount, MaxBECount, P); } /// getNumIterationsInRange - Return the number of iterations of this loop that @@ -9247,6 +9358,8 @@ ValueExprMap(std::move(Arg.ValueExprMap)), WalkingBEDominatingConds(false), ProvingSplitPredicate(false), BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)), + PredicatedBackedgeTakenCounts( + std::move(Arg.PredicatedBackedgeTakenCounts)), ConstantEvolutionLoopExitValue( std::move(Arg.ConstantEvolutionLoopExitValue)), ValuesAtScopes(std::move(Arg.ValuesAtScopes)), @@ -9279,6 +9392,8 @@ // that a loop had multiple computable exits. for (auto &BTCI : BackedgeTakenCounts) BTCI.second.clear(); + for (auto &BTCI : PredicatedBackedgeTakenCounts) + BTCI.second.clear(); assert(PendingLoopPredicates.empty() && "isImpliedCond garbage"); assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!"); @@ -9321,6 +9436,20 @@ OS << "Unpredictable max backedge-taken count. "; } + OS << "\n" + "Loop "; + L->getHeader()->printAsOperand(OS, /*PrintType=*/false); + OS << ": "; + + SCEVUnionPredicate Pred; + auto PBT = SE->getPredicatedBackedgeTakenCount(L, Pred); + if (!isa(PBT)) { + OS << "Predicated backedge-taken count is " << *PBT << "\n"; + OS << " Predicates:\n"; + Pred.print(OS, 4); + } else { + OS << "Unpredictable predicated backedge-taken count. "; + } OS << "\n"; } @@ -9605,16 +9734,21 @@ ExprValueMap.erase(S); HasRecMap.erase(S); - for (DenseMap::iterator I = - BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) { - BackedgeTakenInfo &BEInfo = I->second; - if (BEInfo.hasOperand(S, this)) { - BEInfo.clear(); - BackedgeTakenCounts.erase(I++); + auto RemoveSCEVFromBackedgeMap = + [S, this] (DenseMap &Map) { + for (auto I = Map.begin(), E = Map.end(); I != E;) { + BackedgeTakenInfo &BEInfo = I->second; + if (BEInfo.hasOperand(S, this)) { + BEInfo.clear(); + Map.erase(I++); + } + else + ++I; } - else - ++I; - } + }; + + RemoveSCEVFromBackedgeMap(BackedgeTakenCounts); + RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts); } typedef DenseMap VerifyMap; @@ -10019,7 +10153,7 @@ PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE, Loop &L) - : SE(SE), L(L), Generation(0) {} + : SE(SE), L(L), Generation(0), BackedgeCount(nullptr) {} const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) { const SCEV *Expr = SE.getSCEV(V); @@ -10040,6 +10174,15 @@ return NewSCEV; } +const SCEV *PredicatedScalarEvolution::getBackedgeTakenCount() { + if (!BackedgeCount) { + SCEVUnionPredicate BackedgePred; + BackedgeCount = SE.getPredicatedBackedgeTakenCount(&L, BackedgePred); + addPredicate(BackedgePred); + } + return BackedgeCount; +} + void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) { if (Preds.implies(&Pred)) return; @@ -10101,10 +10244,10 @@ return New; } -PredicatedScalarEvolution:: -PredicatedScalarEvolution(const PredicatedScalarEvolution &Init) : - RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds), - Generation(Init.Generation) { +PredicatedScalarEvolution::PredicatedScalarEvolution( + const PredicatedScalarEvolution &Init) + : RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds), + Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) { for (auto I = Init.FlagsMap.begin(), E = Init.FlagsMap.end(); I != E; ++I) FlagsMap.insert(*I); } Index: lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- lib/Analysis/ScalarEvolutionExpander.cpp +++ lib/Analysis/ScalarEvolutionExpander.cpp @@ -2003,7 +2003,9 @@ assert(AR->isAffine() && "Cannot generate RT check for " "non-affine expression"); - const SCEV *ExitCount = SE.getBackedgeTakenCount(AR->getLoop()); + SCEVUnionPredicate Pred; + const SCEV *ExitCount = SE.getPredicatedBackedgeTakenCount(AR->getLoop(), + Pred); const SCEV *Step = AR->getStepRecurrence(SE); const SCEV *Start = AR->getStart(); Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2727,7 +2727,7 @@ IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); // Find the loop boundaries. ScalarEvolution *SE = PSE.getSE(); - const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(OrigLoop); + const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount(); assert(BackedgeTakenCount != SE->getCouldNotCompute() && "Invalid loop count"); @@ -4368,7 +4368,7 @@ } // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop); + const SCEV *ExitCount = PSE.getBackedgeTakenCount(); if (ExitCount == PSE.getSE()->getCouldNotCompute()) { emitAnalysis(VectorizationReport() << "could not determine number of loop iterations"); Index: test/Analysis/ScalarEvolution/predicated-trip-count.ll =================================================================== --- /dev/null +++ test/Analysis/ScalarEvolution/predicated-trip-count.ll @@ -0,0 +1,109 @@ +; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@A = weak global [1000 x i32] zeroinitializer, align 32 + +; The resulting predicate is i16 {0,+,1} , meanining +; that the resulting backedge expression will be valid for: +; (1 + (-1 smax %M)) <= MAX_INT16 +; +; At the limit condition for M (MAX_INT16 - 1) we have in the +; last iteration: +; i0 <- MAX_INT16 +; i0.ext <- MAX_INT16 +; +; and therefore no wrapping happend for i0 or i0.ext +; throughout the execution of the loop. The resulting predicated +; backedge taken count is correct. + +; CHECK: Classifying expressions for: @test1 +; CHECK: %i.0.ext = sext i16 %i.0 to i32 +; CHECK-NEXT: --> (sext i16 {0,+,1}<%bb3> to i32) +; CHECK: Loop %bb3: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %bb3: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (1 + (-1 smax %M)) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,1}<%bb3> Added Flags: +define void @test1(i32 %N, i32 %M) { +entry: + br label %bb3 + +bb: ; preds = %bb3 + %tmp = getelementptr [1000 x i32], [1000 x i32]* @A, i32 0, i16 %i.0 ; [#uses=1] + store i32 123, i32* %tmp + %tmp2 = add i16 %i.0, 1 ; [#uses=1] + br label %bb3 + +bb3: ; preds = %bb, %entry + %i.0 = phi i16 [ 0, %entry ], [ %tmp2, %bb ] ; [#uses=3] + %i.0.ext = sext i16 %i.0 to i32 + %tmp3 = icmp sle i32 %i.0.ext, %M ; [#uses=1] + br i1 %tmp3, label %bb, label %bb5 + +bb5: ; preds = %bb3 + br label %return + +return: ; preds = %bb5 + ret void +} + +; The predicated backedge taken count is: +; (2 + (zext i16 %Start to i32) + ((-2 + (-1 * (sext i16 %Start to i32))) +; smax (-1 + (-1 * %M))) +; ) + +; -1 + (-1 * %M) <= (-2 + (-1 * (sext i16 %Start to i32)) +; The predicated backedge taken count is 0. +; From the IR, this is correct since we will bail out at the +; first iteration. + + +; * -1 + (-1 * %M) > (-2 + (-1 * (sext i16 %Start to i32)) +; or: %M < 1 + (sext i16 %Start to i32) +; +; The predicated backedge taken count is 1 + (zext i16 %Start to i32) - %M +; +; If %M >= MIN_INT + 1, this predicated backedge taken count would be correct (even +; without predicates). However, for %M < MIN_INT this would be an infinite loop. +; In these cases, the {%Start,+,-1} predicate would be false, as the +; final value of the expression {%Start,+,-1} expression (%M - 1) would not be +; representable as an i16. + +; There is also a limit case here where the value of %M is MIN_INT. In this case +; we still have an infinite loop, since icmp sge %x, MIN_INT will always return +; true. + +; CHECK: Classifying expressions for: @test2 + +; CHECK: %i.0.ext = sext i16 %i.0 to i32 +; CHECK-NEXT: --> (sext i16 {%Start,+,-1}<%bb3> to i32) +; CHECK: Loop %bb3: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %bb3: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (2 + (sext i16 %Start to i32) + ((-2 + (-1 * (sext i16 %Start to i32))) smax (-1 + (-1 * %M)))) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {%Start,+,-1}<%bb3> Added Flags: + +define void @test2(i32 %N, i32 %M, i16 %Start) { +entry: + br label %bb3 + +bb: ; preds = %bb3 + %tmp = getelementptr [1000 x i32], [1000 x i32]* @A, i32 0, i16 %i.0 ; [#uses=1] + store i32 123, i32* %tmp + %tmp2 = sub i16 %i.0, 1 ; [#uses=1] + br label %bb3 + +bb3: ; preds = %bb, %entry + %i.0 = phi i16 [ %Start, %entry ], [ %tmp2, %bb ] ; [#uses=3] + %i.0.ext = sext i16 %i.0 to i32 + %tmp3 = icmp sge i32 %i.0.ext, %M ; [#uses=1] + br i1 %tmp3, label %bb, label %bb5 + +bb5: ; preds = %bb3 + br label %return + +return: ; preds = %bb5 + ret void +} + Index: test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll @@ -0,0 +1,166 @@ +; RUN: opt -mtriple=aarch64--linux-gnueabi -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s -S | FileCheck %s + +; The following tests contain loops for which SCEV cannot determine the backedge +; taken count. This is because the backedge taken condition is produced by an +; icmp with one of the sides being a loop varying non-AddRec expression. +; However, there is a possibility to normalize this to an AddRec expression +; using SCEV predicates. This allows us to compute a 'guarded' backedge count. +; The Loop Vectorizer is able to version to loop in order to use this guarded +; backedge count and vectorize more loops. + + +; CHECK-LABEL: test_sge +; CHECK-LABEL: vector.scevcheck +; CHECK-LABEL: vector.body +define void @test_sge(i32* noalias %A, + i32* noalias %B, + i32* noalias %C, i32 %N) { +entry: + %cmp13 = icmp eq i32 %N, 0 + br i1 %cmp13, label %for.end, label %for.body.preheader + +for.body.preheader: + br label %for.body + +for.body: + %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ 0, %for.body.preheader ] + %indvars.next = add i16 %indvars.iv, 1 + %indvars.ext = zext i16 %indvars.iv to i32 + + %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext + %1 = load i32, i32* %arrayidx3, align 4 + + %mul4 = mul i32 %1, %0 + + %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext + store i32 %mul4, i32* %arrayidx7, align 4 + + %exitcond = icmp sge i32 %indvars.ext, %N + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + +; CHECK-LABEL: test_uge +; CHECK-LABEL: vector.scevcheck +; CHECK-LABEL: vector.body +define void @test_uge(i32* noalias %A, + i32* noalias %B, + i32* noalias %C, i32 %N, i32 %Offset) { +entry: + %cmp13 = icmp eq i32 %N, 0 + br i1 %cmp13, label %for.end, label %for.body.preheader + +for.body.preheader: + br label %for.body + +for.body: + %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ 0, %for.body.preheader ] + %indvars.next = add i16 %indvars.iv, 1 + + %indvars.ext = sext i16 %indvars.iv to i32 + %indvars.access = add i32 %Offset, %indvars.ext + + %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.access + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.access + %1 = load i32, i32* %arrayidx3, align 4 + + %mul4 = add i32 %1, %0 + + %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.access + store i32 %mul4, i32* %arrayidx7, align 4 + + %exitcond = icmp uge i32 %indvars.ext, %N + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + +; CHECK-LABEL: test_ule +; CHECK-LABEL: vector.scevcheck +; CHECK-LABEL: vector.body +define void @test_ule(i32* noalias %A, + i32* noalias %B, + i32* noalias %C, i32 %N, + i16 %M) { +entry: + %cmp13 = icmp eq i32 %N, 0 + br i1 %cmp13, label %for.end, label %for.body.preheader + +for.body.preheader: + br label %for.body + +for.body: + %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ %M, %for.body.preheader ] + %indvars.next = sub i16 %indvars.iv, 1 + %indvars.ext = zext i16 %indvars.iv to i32 + + %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext + %1 = load i32, i32* %arrayidx3, align 4 + + %mul4 = mul i32 %1, %0 + + %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext + store i32 %mul4, i32* %arrayidx7, align 4 + + %exitcond = icmp ule i32 %indvars.ext, %N + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + +; CHECK-LABEL: test_sle +; CHECK-LABEL: vector.scevcheck +; CHECK-LABEL: vector.body +define void @test_sle(i32* noalias %A, + i32* noalias %B, + i32* noalias %C, i32 %N, + i16 %M) { +entry: + %cmp13 = icmp eq i32 %N, 0 + br i1 %cmp13, label %for.end, label %for.body.preheader + +for.body.preheader: + br label %for.body + +for.body: + %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ %M, %for.body.preheader ] + %indvars.next = sub i16 %indvars.iv, 1 + %indvars.ext = sext i16 %indvars.iv to i32 + + %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext + %1 = load i32, i32* %arrayidx3, align 4 + + %mul4 = mul i32 %1, %0 + + %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext + store i32 %mul4, i32* %arrayidx7, align 4 + + %exitcond = icmp sle i32 %indvars.ext, %N + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} Index: test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll =================================================================== --- test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll +++ test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll @@ -54,8 +54,9 @@ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !16 %0 = trunc i64 %indvars.iv to i32, !dbg !16 + %ld = load i32, i32* %arrayidx, align 4 store i32 %0, i32* %arrayidx, align 4, !dbg !16, !tbaa !18 - %cmp3 = icmp sle i32 %0, %Length, !dbg !22 + %cmp3 = icmp sle i32 %ld, %Length, !dbg !22 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !12 %1 = trunc i64 %indvars.iv.next to i32 %cmp = icmp slt i32 %1, %Length, !dbg !12