Index: include/llvm/Analysis/ScalarEvolution.h =================================================================== --- include/llvm/Analysis/ScalarEvolution.h +++ include/llvm/Analysis/ScalarEvolution.h @@ -270,10 +270,17 @@ } }; - /// SCEVWrapPredicate - This class represents an assumption - /// made on an AddRec expression. Given an affine AddRec expression - /// {a,+,b}, we assume that it has the nssw or nusw flags (defined - /// below). + /// SCEVWrapPredicate - This class represents an assumption made on an AddRec + /// expression. Given an affine AddRec expression {a,+,b}, we assume that it + /// has the nssw or nusw flags (defined below) in the first X iterations of + /// the loop, where X is a SCEV expression returned by + /// getPredicatedBackedgeTakenCount). + /// + /// Note that this does not imply that X is equal to the backedge taken + /// count. This means that if we have a nusw predicate for i32 {0,+,1} with a + /// predicated backedge taken count of X, we only guarantee that {0,+,1} has + /// nusw in the first X iterations. {0,+,1} may still wrap in the loop if we + /// have more than X iterations. class SCEVWrapPredicate final : public SCEVPredicate { public: /// Similar to SCEV::NoWrapFlags, but with slightly different semantics @@ -520,9 +527,14 @@ const SCEV *Exact; const SCEV *Max; + /// A predicate union guard for this ExitLimit. The result is only + /// valid if this predicate evaluates to 'true' at run-time. + SCEVUnionPredicate Pred; + /*implicit*/ ExitLimit(const SCEV *E) : Exact(E), Max(E) {} - ExitLimit(const SCEV *E, const SCEV *M) : Exact(E), Max(M) { + ExitLimit(const SCEV *E, const SCEV *M, SCEVUnionPredicate &P) + : Exact(E), Max(M), Pred(P) { assert((isa(Exact) || !isa(Max)) && "Exact is not allowed to be less precise than Max"); @@ -534,30 +546,146 @@ return !isa(Exact) || !isa(Max); } + + /// Test whether this ExitLimit contains all information. + bool hasFullInfo() const { return !isa(Exact); } }; + /// Forward declaration of ExitNotTakenExtras + struct ExitNotTakenExtras; + /// Information about the number of times a particular loop exit may be /// reached before exiting the loop. struct ExitNotTakenInfo { AssertingVH ExitingBlock; const SCEV *ExactNotTaken; - PointerIntPair NextExit; + + PointerIntPair ExtraInfo; ExitNotTakenInfo() : ExitingBlock(nullptr), ExactNotTaken(nullptr) {} + ExitNotTakenInfo(BasicBlock *ExitBlock, const SCEV *Expr, + ExitNotTakenExtras *Ptr) + : ExitingBlock(ExitBlock), ExactNotTaken(Expr) { + ExtraInfo.setPointer(Ptr); + } /// Return true if all loop exits are computable. - bool isCompleteList() const { - return NextExit.getInt() == 0; + bool isCompleteList() const { return ExtraInfo.getInt() == 0; } + + /// Sets the incomplete property, indicating that one of the loop exits + /// doesn't have a corresponding ExitNotTakenInfo entry. + void setIncomplete() { ExtraInfo.setInt(1); } + + /// Returns a pointer to the predicate associated with this information, + /// or nullptr if this doesn't exist (meaning always true). + SCEVUnionPredicate *getPred() const { + if (auto *Info = ExtraInfo.getPointer()) + return &Info->Pred; + + return nullptr; } - void setIncomplete() { NextExit.setInt(1); } + /// Return true if the SCEV predicate associated with this information + /// is always true. + bool hasAlwaysTruePred() const { + return !getPred() || getPred()->isAlwaysTrue(); + } - /// Return a pointer to the next exit's not-taken info. - ExitNotTakenInfo *getNextExit() const { - return NextExit.getPointer(); + /// Defines a simple forward iterator for ExitNotTakenInfo. + class ExitNotTakenInfoIterator + : public std::iterator { + const ExitNotTakenInfo *Start; + unsigned Position; + + public: + ExitNotTakenInfoIterator(const ExitNotTakenInfo *Start, + unsigned Position) + : Start(Start), Position(Position) {} + + const ExitNotTakenInfo &operator*() const { + if (Position == 0) + return *Start; + + return Start->ExtraInfo.getPointer()->Exits[Position - 1]; + } + + const ExitNotTakenInfo *operator->() const { + if (Position == 0) + return Start; + + return &Start->ExtraInfo.getPointer()->Exits[Position - 1]; + } + + bool operator==(const ExitNotTakenInfoIterator &RHS) const { + return Start == RHS.Start && Position == RHS.Position; + } + + bool operator!=(const ExitNotTakenInfoIterator &RHS) const { + return Start != RHS.Start || Position != RHS.Position; + } + + ExitNotTakenInfoIterator &operator++() { // Preincrement + if (!Start) + return *this; + + unsigned Elements = + Start->ExtraInfo.getPointer() + ? Start->ExtraInfo.getPointer()->Exits.size() + 1 + : 1; + + ++Position; + + // We've run out of elements. + if (Position == Elements) { + Start = nullptr; + Position = 0; + } + + return *this; + } + ExitNotTakenInfoIterator operator++(int) { // Postincrement + ExitNotTakenInfoIterator Tmp = *this; + ++*this; + return Tmp; + } + }; + + /// Iterators + ExitNotTakenInfoIterator begin() const { + return ExitNotTakenInfoIterator(this, 0); } + ExitNotTakenInfoIterator end() const { + return ExitNotTakenInfoIterator(nullptr, 0); + } + }; - void setNextExit(ExitNotTakenInfo *ENT) { NextExit.setPointer(ENT); } + /// Describes the extra information that a ExitNotTakenInfo can have. + struct ExitNotTakenExtras { + /// The predicate associated with the ExitNotTakenInfo struct. + SCEVUnionPredicate Pred; + + /// The extra exits in the loop. Only the ExitNotTakenExtras structure + /// pointed to by the first ExitNotTakenInfo struct (associated with the + /// first loop exit) will populate this vector to prevent having + /// redundant information. + SmallVector Exits; + }; + + /// A struct containing the information attached to a backedge. + struct EdgeInfo { + EdgeInfo(BasicBlock *Block, const SCEV *Taken, SCEVUnionPredicate &P) : + ExitBlock(Block), Taken(Taken), Pred(std::move(P)) {} + + /// The exit basic block. + BasicBlock *ExitBlock; + + /// The (exact) number of time we take the edge back. + const SCEV *Taken; + + /// The SCEV predicated associated with Taken. If Pred doesn't evaluate + /// to true, the information in Taken is not valid (or equivalent with + /// a CouldNotCompute. + SCEVUnionPredicate Pred; }; /// Information about the backedge-taken count of a loop. This currently @@ -569,16 +697,16 @@ ExitNotTakenInfo ExitNotTaken; /// An expression indicating the least maximum backedge-taken count of the - /// loop that is known, or a SCEVCouldNotCompute. + /// loop that is known, or a SCEVCouldNotCompute. This expression is only + /// valid if the predicates associated with all loop exits are true. const SCEV *Max; public: BackedgeTakenInfo() : Max(nullptr) {} /// Initialize BackedgeTakenInfo from a list of exact exit counts. - BackedgeTakenInfo( - SmallVectorImpl< std::pair > &ExitCounts, - bool Complete, const SCEV *MaxCount); + BackedgeTakenInfo(SmallVectorImpl &ExitCounts, bool Complete, + const SCEV *MaxCount); /// Test whether this BackedgeTakenInfo contains any computed information, /// or whether it's all SCEVCouldNotCompute values. @@ -586,11 +714,27 @@ return ExitNotTaken.ExitingBlock || !isa(Max); } + /// Test whether this BackedgeTakenInfo contains complete information. + bool hasFullInfo() const { return ExitNotTaken.isCompleteList(); } + /// Return an expression indicating the exact backedge-taken count of the - /// loop if it is known, or SCEVCouldNotCompute otherwise. This is the + /// loop if it is known or SCEVCouldNotCompute otherwise. This is the /// number of times the loop header can be guaranteed to execute, minus /// one. - const SCEV *getExact(ScalarEvolution *SE) const; + /// + /// If the SCEV predicate associated with the answer can be different + /// from AlwaysTrue, we must add a (non null) Predicates argument. + /// The SCEV predicate associated with the answer will be added to + /// Predicates. A run-time check needs to be emitted for the SCEV + /// predicate in order for the answer to be valid. + /// + /// Note that we should always know if we need to pass a predicate + /// argument or not from the way the ExitCounts vector was computed. + /// If we allowed SCEV predicates to be generated when populating this + /// vector, this information can contain them and therefore a + /// SCEVPredicate argument should be added to getExact. + const SCEV *getExact(ScalarEvolution *SE, + SCEVUnionPredicate *Predicates = nullptr) const; /// Return the number of times this loop exit may fall through to the back /// edge, or SCEVCouldNotCompute. The loop is guaranteed not to exit via @@ -611,7 +755,11 @@ /// Cache the backedge-taken count of the loops for this function as they /// are computed. - DenseMap BackedgeTakenCounts; + DenseMap BackedgeTakenCounts; + + /// Cache the predicated backedge-taken count of the loops for this + /// function as they are computed. + DenseMap PredicatedBackedgeTakenCounts; /// This map contains entries for all of the PHI instructions that we /// attempt to compute constant evolutions for. This allows us to avoid @@ -713,33 +861,49 @@ void forgetSymbolicName(Instruction *I, const SCEV *SymName); /// Return the BackedgeTakenInfo for the given loop, lazily computing new - /// values if the loop hasn't been analyzed yet. + /// values if the loop hasn't been analyzed yet. The returned result is + /// guaranteed not to be predicated. const BackedgeTakenInfo &getBackedgeTakenInfo(const Loop *L); + /// Similar to getBackedgeTakenInfo, but will add predicates as required + /// with the purpose of returning complete information. + const BackedgeTakenInfo &getPredicatedBackedgeTakenInfo(const Loop *L); + /// Compute the number of times the specified loop will iterate. - BackedgeTakenInfo computeBackedgeTakenCount(const Loop *L); + /// If AllowPredicates is set, we will create new SCEV predicates as + /// necessary in order to return an exact answer. + BackedgeTakenInfo computeBackedgeTakenCount(const Loop *L, + bool AllowPredicates = false); /// Compute the number of times the backedge of the specified loop will - /// execute if it exits via the specified block. - ExitLimit computeExitLimit(const Loop *L, BasicBlock *ExitingBlock); + /// execute if it exits via the specified block. If AllowPredicates is set, + /// this call will try to use a minimal set of SCEV predicates in order to + /// return an exact answer. + ExitLimit computeExitLimit(const Loop *L, BasicBlock *ExitingBlock, + bool AllowPredicates = false); /// Compute the number of times the backedge of the specified loop will /// execute if its exit condition were a conditional branch of ExitCond, - /// TBB, and FBB. + /// TBB, and FBB. If AllowPredicates is set, this call will try to use a + /// minimal set of SCEV predicates in order to return an exact answer. ExitLimit computeExitLimitFromCond(const Loop *L, Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB, - bool IsSubExpr); + bool IsSubExpr, + bool AllowPredicates = false); /// Compute the number of times the backedge of the specified loop will /// execute if its exit condition were a conditional branch of the ICmpInst - /// ExitCond, TBB, and FBB. + /// ExitCond, TBB, and FBB. If AllowPredicates is set, this call will try + /// to use a minimal set of SCEV predicates in order to return an exact + /// answer. ExitLimit computeExitLimitFromICmp(const Loop *L, ICmpInst *ExitCond, BasicBlock *TBB, BasicBlock *FBB, - bool IsSubExpr); + bool IsSubExpr, + bool AllowPredicates = false); /// Compute the number of times the backedge of the specified loop will /// execute if its exit condition were a switch with a single exiting case @@ -777,7 +941,10 @@ /// Return the number of times an exit condition comparing the specified /// value to zero will execute. If not computable, return CouldNotCompute. - ExitLimit HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr); + /// If AllowPredicates is set, this call will try to use a minimal set of + /// SCEV predicates in order to return an exact answer. + ExitLimit HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr, + bool AllowPredicates = false); /// Return the number of times an exit condition checking the specified /// value for nonzero will execute. If not computable, return @@ -787,10 +954,15 @@ /// Return the number of times an exit condition containing the specified /// less-than comparison will execute. If not computable, return /// CouldNotCompute. isSigned specifies whether the less-than is signed. - ExitLimit HowManyLessThans(const SCEV *LHS, const SCEV *RHS, - const Loop *L, bool isSigned, bool IsSubExpr); + /// If AllowPredicates is set, this call will try to use a minimal set of + /// SCEV predicates in order to return an exact answer. + ExitLimit HowManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, + bool isSigned, bool IsSubExpr, + bool AllowPredicates = false); + ExitLimit HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS, - const Loop *L, bool isSigned, bool IsSubExpr); + const Loop *L, bool isSigned, bool IsSubExpr, + bool AllowPredicates = false); /// Return a predecessor of BB (which may not be an immediate predecessor) /// which has exactly one successor from which BB is reachable, or null if @@ -1168,6 +1340,13 @@ /// const SCEV *getBackedgeTakenCount(const Loop *L); + /// Similar to getBackedgeTakenCount, except it will add a set of + /// SCEV predicates to Predicates that are required to be true in order for + /// the answer to be correct. Predicates can be checked with run-time + /// checks and can be used to perform loop versioning. + const SCEV *getPredicatedBackedgeTakenCount(const Loop *L, + SCEVUnionPredicate &Predicates); + /// Similar to getBackedgeTakenCount, except return the least SCEV value /// that is known never to be less than the actual backedge taken count. const SCEV *getMaxBackedgeTakenCount(const Loop *L); @@ -1493,6 +1672,8 @@ /// by ScalarEvolution is guaranteed to be preserved, even when adding new /// predicates. const SCEV *getSCEV(Value *V); + /// Get the (predicated) backedge count for the analyzed loop. + const SCEV *getBackedgeTakenCount(); /// \brief Adds a new predicate. void addPredicate(const SCEVPredicate &Pred); /// \brief Attempts to produce an AddRecExpr for V by adding additional @@ -1536,6 +1717,8 @@ /// figure out if the predicate has changed from the last rewrite of the /// SCEV. If so, we need to perform a new rewrite. unsigned Generation; + /// The backedge taken count. + const SCEV *BackedgeCount; }; } Index: lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- lib/Analysis/LoopAccessAnalysis.cpp +++ lib/Analysis/LoopAccessAnalysis.cpp @@ -140,7 +140,7 @@ else { const SCEVAddRecExpr *AR = dyn_cast(Sc); assert(AR && "Invalid addrec expression"); - const SCEV *Ex = SE->getBackedgeTakenCount(Lp); + const SCEV *Ex = PSE.getBackedgeTakenCount(); ScStart = AR->getStart(); ScEnd = AR->evaluateAtIteration(Ex, *SE); @@ -1460,7 +1460,7 @@ } // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop); + const SCEV *ExitCount = PSE.getBackedgeTakenCount(); if (ExitCount == PSE.getSE()->getCouldNotCompute()) { emitAnalysis(LoopAccessReport() << "could not determine number of loop iterations"); Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -5223,6 +5223,12 @@ return getBackedgeTakenInfo(L).getExact(ExitingBlock, this); } +const SCEV * +ScalarEvolution::getPredicatedBackedgeTakenCount(const Loop *L, + SCEVUnionPredicate &Preds) { + return getPredicatedBackedgeTakenInfo(L).getExact(this, &Preds); +} + /// getBackedgeTakenCount - If the specified loop has a predictable /// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute /// object. The backedge-taken count is the number of times the loop header @@ -5258,6 +5264,23 @@ } const ScalarEvolution::BackedgeTakenInfo & +ScalarEvolution::getPredicatedBackedgeTakenInfo(const Loop *L) { + auto &BTI = getBackedgeTakenInfo(L); + if (BTI.hasFullInfo()) + return BTI; + + auto Pair = PredicatedBackedgeTakenCounts.insert({L, BackedgeTakenInfo()}); + + if (!Pair.second) + return Pair.first->second; + + BackedgeTakenInfo Result = + computeBackedgeTakenCount(L, /*AllowPredicates=*/true); + + return PredicatedBackedgeTakenCounts.find(L)->second = Result; +} + +const ScalarEvolution::BackedgeTakenInfo & ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // Initially insert an invalid entry for this loop. If the insertion // succeeds, proceed to actually compute a backedge-taken count and @@ -5337,12 +5360,17 @@ /// compute a trip count, or if the loop is deleted. void ScalarEvolution::forgetLoop(const Loop *L) { // Drop any stored trip count value. - DenseMap::iterator BTCPos = - BackedgeTakenCounts.find(L); - if (BTCPos != BackedgeTakenCounts.end()) { - BTCPos->second.clear(); - BackedgeTakenCounts.erase(BTCPos); - } + auto RemoveLoopFromBackedgeMap = + [L](DenseMap &Map) { + auto BTCPos = Map.find(L); + if (BTCPos != Map.end()) { + BTCPos->second.clear(); + Map.erase(BTCPos); + } + }; + + RemoveLoopFromBackedgeMap(BackedgeTakenCounts); + RemoveLoopFromBackedgeMap(PredicatedBackedgeTakenCounts); // Drop information about expressions based on loop-header PHIs. SmallVector Worklist; @@ -5411,7 +5439,8 @@ /// is the caller's responsibility to specify the relevant loop exit using /// getExact(ExitingBlock, SE). const SCEV * -ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const { +ScalarEvolution::BackedgeTakenInfo::getExact( + ScalarEvolution *SE, SCEVUnionPredicate *Preds) const { // If any exits were not computable, the loop is not computable. if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute(); @@ -5420,16 +5449,20 @@ assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info"); const SCEV *BECount = nullptr; - for (const ExitNotTakenInfo *ENT = &ExitNotTaken; - ENT != nullptr; ENT = ENT->getNextExit()) { - - assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV"); + for (auto &ENT : ExitNotTaken) { + assert(ENT.ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV"); if (!BECount) - BECount = ENT->ExactNotTaken; - else if (BECount != ENT->ExactNotTaken) + BECount = ENT.ExactNotTaken; + else if (BECount != ENT.ExactNotTaken) return SE->getCouldNotCompute(); + if (Preds && ENT.getPred()) + Preds->add(ENT.getPred()); + + assert((Preds || ENT.hasAlwaysTruePred()) && + "Predicate should be always true!"); } + assert(BECount && "Invalid not taken count for loop exit"); return BECount; } @@ -5438,18 +5471,20 @@ const SCEV * ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock, ScalarEvolution *SE) const { - for (const ExitNotTakenInfo *ENT = &ExitNotTaken; - ENT != nullptr; ENT = ENT->getNextExit()) { + for (auto &ENT : ExitNotTaken) + if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePred()) + return ENT.ExactNotTaken; - if (ENT->ExitingBlock == ExitingBlock) - return ENT->ExactNotTaken; - } return SE->getCouldNotCompute(); } /// getMax - Get the max backedge taken count for the loop. const SCEV * ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const { + for (auto &ENT : ExitNotTaken) + if (!ENT.hasAlwaysTruePred()) + return SE->getCouldNotCompute(); + return Max ? Max : SE->getCouldNotCompute(); } @@ -5461,22 +5496,19 @@ if (!ExitNotTaken.ExitingBlock) return false; - for (const ExitNotTakenInfo *ENT = &ExitNotTaken; - ENT != nullptr; ENT = ENT->getNextExit()) { - - if (ENT->ExactNotTaken != SE->getCouldNotCompute() - && SE->hasOperand(ENT->ExactNotTaken, S)) { + for (auto &ENT : ExitNotTaken) + if (ENT.ExactNotTaken != SE->getCouldNotCompute() && + SE->hasOperand(ENT.ExactNotTaken, S)) return true; - } - } + return false; } /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each /// computable exit into a persistent ExitNotTakenInfo array. ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( - SmallVectorImpl< std::pair > &ExitCounts, - bool Complete, const SCEV *MaxCount) : Max(MaxCount) { + SmallVectorImpl &ExitCounts, bool Complete, const SCEV *MaxCount) + : Max(MaxCount) { if (!Complete) ExitNotTaken.setIncomplete(); @@ -5484,18 +5516,43 @@ unsigned NumExits = ExitCounts.size(); if (NumExits == 0) return; - ExitNotTaken.ExitingBlock = ExitCounts[0].first; - ExitNotTaken.ExactNotTaken = ExitCounts[0].second; - if (NumExits == 1) return; + ExitNotTaken.ExitingBlock = ExitCounts[0].ExitBlock; + ExitNotTaken.ExactNotTaken = ExitCounts[0].Taken; + + // Determine the number of ExitNotTakenExtras structures that we need. + unsigned ExtraInfoSize = 0; + if (NumExits > 1) + ExtraInfoSize = 1 + std::count_if(std::next(ExitCounts.begin()), + ExitCounts.end(), [](EdgeInfo &Entry) { + return !Entry.Pred.isAlwaysTrue(); + }); + else if (!ExitCounts[0].Pred.isAlwaysTrue()) + ExtraInfoSize = 1; + + ExitNotTakenExtras *ENT = nullptr; + + // Allocate the ExitNotTakenExtras structures and initialize the first + // element (ExitNotTaken). + if (ExtraInfoSize > 0) { + ENT = new ExitNotTakenExtras[ExtraInfoSize]; + ExitNotTaken.ExtraInfo.setPointer(&ENT[0]); + *ExitNotTaken.getPred() = std::move(ExitCounts[0].Pred); + } + + if (NumExits == 1) + return; + + auto &Exits = ExitNotTaken.ExtraInfo.getPointer()->Exits; // Handle the rare case of multiple computable exits. - ExitNotTakenInfo *ENT = new ExitNotTakenInfo[NumExits-1]; + for (unsigned i = 1, PredPos = 1; i < NumExits; ++i) { + ExitNotTakenExtras *Ptr = nullptr; + if (!ExitCounts[i].Pred.isAlwaysTrue()) { + Ptr = &ENT[PredPos++]; + Ptr->Pred = std::move(ExitCounts[i].Pred); + } - ExitNotTakenInfo *PrevENT = &ExitNotTaken; - for (unsigned i = 1; i < NumExits; ++i, PrevENT = ENT, ++ENT) { - PrevENT->setNextExit(ENT); - ENT->ExitingBlock = ExitCounts[i].first; - ENT->ExactNotTaken = ExitCounts[i].second; + Exits.emplace_back(ExitCounts[i].ExitBlock, ExitCounts[i].Taken, Ptr); } } @@ -5503,17 +5560,18 @@ void ScalarEvolution::BackedgeTakenInfo::clear() { ExitNotTaken.ExitingBlock = nullptr; ExitNotTaken.ExactNotTaken = nullptr; - delete[] ExitNotTaken.getNextExit(); + delete[] ExitNotTaken.ExtraInfo.getPointer(); } /// computeBackedgeTakenCount - Compute the number of times the backedge /// of the specified loop will execute. ScalarEvolution::BackedgeTakenInfo -ScalarEvolution::computeBackedgeTakenCount(const Loop *L) { +ScalarEvolution::computeBackedgeTakenCount(const Loop *L, + bool AllowPredicates) { SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); - SmallVector, 4> ExitCounts; + SmallVector ExitCounts; bool CouldComputeBECount = true; BasicBlock *Latch = L->getLoopLatch(); // may be NULL. const SCEV *MustExitMaxBECount = nullptr; @@ -5521,9 +5579,13 @@ // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts // and compute maxBECount. + // Do a union of all the predicates here. for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { BasicBlock *ExitBB = ExitingBlocks[i]; - ExitLimit EL = computeExitLimit(L, ExitBB); + ExitLimit EL = computeExitLimit(L, ExitBB, AllowPredicates); + + assert((AllowPredicates || EL.Pred.isAlwaysTrue()) && + "Predicated exit limit when predicates are not allowed!"); // 1. For each exit that can be computed, add an entry to ExitCounts. // CouldComputeBECount is true only if all exits can be computed. @@ -5532,7 +5594,7 @@ // we won't be able to compute an exact value for the loop. CouldComputeBECount = false; else - ExitCounts.push_back({ExitBB, EL.Exact}); + ExitCounts.emplace_back(EdgeInfo(ExitBB, EL.Exact, EL.Pred)); // 2. Derive the loop's MaxBECount from each exit's max number of // non-exiting iterations. Partition the loop exits into two kinds: @@ -5566,7 +5628,8 @@ } ScalarEvolution::ExitLimit -ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { +ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock, + bool AllowPredicates) { // Okay, we've chosen an exiting block. See what condition causes us to exit // at this block and remember the exit block and whether all other targets @@ -5631,9 +5694,9 @@ if (BranchInst *BI = dyn_cast(Term)) { assert(BI->isConditional() && "If unconditional, it can't be in loop!"); // Proceed to the next level to examine the exit condition expression. - return computeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0), - BI->getSuccessor(1), - /*ControlsExit=*/IsOnlyExit); + return computeExitLimitFromCond( + L, BI->getCondition(), BI->getSuccessor(0), BI->getSuccessor(1), + /*ControlsExit=*/IsOnlyExit, AllowPredicates); } if (SwitchInst *SI = dyn_cast(Term)) @@ -5656,16 +5719,19 @@ Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB, - bool ControlsExit) { + bool ControlsExit, + bool AllowPredicates) { // Check if the controlling expression for this loop is an And or Or. if (BinaryOperator *BO = dyn_cast(ExitCond)) { if (BO->getOpcode() == Instruction::And) { // Recurse on the operands of the and. bool EitherMayExit = L->contains(TBB); ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, - ControlsExit && !EitherMayExit); + ControlsExit && !EitherMayExit, + AllowPredicates); ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, - ControlsExit && !EitherMayExit); + ControlsExit && !EitherMayExit, + AllowPredicates); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); if (EitherMayExit) { @@ -5692,6 +5758,9 @@ BECount = EL0.Exact; } + SCEVUnionPredicate NP; + NP.add(&EL0.Pred); + NP.add(&EL1.Pred); // There are cases (e.g. PR26207) where computeExitLimitFromCond is able // to be more aggressive when computing BECount than when computing // MaxBECount. In these cases it is possible for EL0.Exact and EL1.Exact @@ -5700,15 +5769,17 @@ !isa(BECount)) MaxBECount = BECount; - return ExitLimit(BECount, MaxBECount); + return ExitLimit(BECount, MaxBECount, NP); } if (BO->getOpcode() == Instruction::Or) { // Recurse on the operands of the or. bool EitherMayExit = L->contains(FBB); ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, - ControlsExit && !EitherMayExit); + ControlsExit && !EitherMayExit, + AllowPredicates); ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, - ControlsExit && !EitherMayExit); + ControlsExit && !EitherMayExit, + AllowPredicates); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); if (EitherMayExit) { @@ -5735,14 +5806,25 @@ BECount = EL0.Exact; } - return ExitLimit(BECount, MaxBECount); + SCEVUnionPredicate NP; + NP.add(&EL0.Pred); + NP.add(&EL1.Pred); + return ExitLimit(BECount, MaxBECount, NP); } } // With an icmp, it may be feasible to compute an exact backedge-taken count. // Proceed to the next level to examine the icmp. - if (ICmpInst *ExitCondICmp = dyn_cast(ExitCond)) - return computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit); + if (ICmpInst *ExitCondICmp = dyn_cast(ExitCond)) { + ExitLimit EL = + computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit); + if (EL.hasFullInfo() || !AllowPredicates) + return EL; + + // Try again, but use SCEV predicates this time. + return computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit, + /*AllowPredicates=*/true); + } // Check for a constant condition. These are normally stripped out by // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to @@ -5766,7 +5848,8 @@ ICmpInst *ExitCond, BasicBlock *TBB, BasicBlock *FBB, - bool ControlsExit) { + bool ControlsExit, + bool AllowPredicates) { // If the condition was exit on true, convert the condition to exit on false ICmpInst::Predicate Cond; @@ -5823,7 +5906,8 @@ switch (Cond) { case ICmpInst::ICMP_NE: { // while (X != Y) // Convert to: while (X-Y != 0) - ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit); + ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit, + AllowPredicates); if (EL.hasAnyInfo()) return EL; break; } @@ -5836,14 +5920,17 @@ case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_ULT: { // while (X < Y) bool IsSigned = Cond == ICmpInst::ICMP_SLT; - ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, ControlsExit); + ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, ControlsExit, + AllowPredicates); if (EL.hasAnyInfo()) return EL; break; } case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_UGT: { // while (X > Y) bool IsSigned = Cond == ICmpInst::ICMP_SGT; - ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit); + ExitLimit EL = + HowManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit, + AllowPredicates); if (EL.hasAnyInfo()) return EL; break; } @@ -6105,7 +6192,8 @@ unsigned BitWidth = getTypeSizeInBits(RHS->getType()); const SCEV *UpperBound = getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth); - return ExitLimit(getCouldNotCompute(), UpperBound); + SCEVUnionPredicate P; + return ExitLimit(getCouldNotCompute(), UpperBound, P); } return getCouldNotCompute(); @@ -6882,7 +6970,9 @@ /// effectively V != 0. We know and take advantage of the fact that this /// expression only being used in a comparison by zero context. ScalarEvolution::ExitLimit -ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) { +ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, + bool AllowPredicates) { + SCEVUnionPredicate P; // If the value is a constant if (const SCEVConstant *C = dyn_cast(V)) { // If the value is already zero, the branch will execute zero times. @@ -6891,6 +6981,12 @@ } const SCEVAddRecExpr *AddRec = dyn_cast(V); + if (!AddRec && AllowPredicates) + // Try to make this an AddRec using runtime tests, in the first X + // iterations of this loop, where X is the SCEV expression found by the + // algorithm below. + AddRec = convertSCEVToAddRecWithPredicates(V, L, P); + if (!AddRec || AddRec->getLoop() != L) return getCouldNotCompute(); @@ -6915,7 +7011,7 @@ // should not accept a root of 2. const SCEV *Val = AddRec->evaluateAtIteration(R1, *this); if (Val->isZero()) - return R1; // We found a quadratic root! + return ExitLimit(R1, R1, P); // We found a quadratic root! } } return getCouldNotCompute(); @@ -6972,7 +7068,7 @@ else MaxBECount = getConstant(CountDown ? CR.getUnsignedMax() : -CR.getUnsignedMin()); - return ExitLimit(Distance, MaxBECount); + return ExitLimit(Distance, MaxBECount, P); } // As a special case, handle the instance where Step is a positive power of @@ -7025,7 +7121,9 @@ auto *NarrowTy = IntegerType::get(getContext(), NarrowWidth); auto *WideTy = Distance->getType(); - return getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy); + const SCEV *Limit = + getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy); + return ExitLimit(Limit, Limit, P); } } @@ -7037,13 +7135,15 @@ if (ControlsExit && AddRec->hasNoSelfWrap()) { const SCEV *Exact = getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); - return ExitLimit(Exact, Exact); + return ExitLimit(Exact, Exact, P); } // Then, try to solve the above equation provided that Start is constant. - if (const SCEVConstant *StartC = dyn_cast(Start)) - return SolveLinEquationWithOverflow(StepC->getAPInt(), -StartC->getAPInt(), - *this); + if (const SCEVConstant *StartC = dyn_cast(Start)) { + const SCEV *E = SolveLinEquationWithOverflow( + StepC->getValue()->getValue(), -StartC->getValue()->getValue(), *this); + return ExitLimit(E, E, P); + } return getCouldNotCompute(); } @@ -8486,12 +8586,18 @@ ScalarEvolution::ExitLimit ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool IsSigned, - bool ControlsExit) { + bool ControlsExit, bool AllowPredicates) { + SCEVUnionPredicate P; // We handle only IV < Invariant if (!isLoopInvariant(RHS, L)) return getCouldNotCompute(); const SCEVAddRecExpr *IV = dyn_cast(LHS); + if (!IV && AllowPredicates) + // Try to make this an AddRec using runtime tests, in the first X + // iterations of this loop, where X is the SCEV expression found by the + // algorithm below. + IV = convertSCEVToAddRecWithPredicates(LHS, L, P); // Avoid weird loops if (!IV || IV->getLoop() != L || !IV->isAffine()) @@ -8560,18 +8666,24 @@ if (isa(MaxBECount)) MaxBECount = BECount; - return ExitLimit(BECount, MaxBECount); + return ExitLimit(BECount, MaxBECount, P); } ScalarEvolution::ExitLimit ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool IsSigned, - bool ControlsExit) { + bool ControlsExit, bool AllowPredicates) { + SCEVUnionPredicate P; // We handle only IV > Invariant if (!isLoopInvariant(RHS, L)) return getCouldNotCompute(); const SCEVAddRecExpr *IV = dyn_cast(LHS); + if (!IV && AllowPredicates) + // Try to make this an AddRec using runtime tests, in the first X + // iterations of this loop, where X is the SCEV expression found by the + // algorithm below. + IV = convertSCEVToAddRecWithPredicates(LHS, L, P); // Avoid weird loops if (!IV || IV->getLoop() != L || !IV->isAffine()) @@ -8642,7 +8754,7 @@ if (isa(MaxBECount)) MaxBECount = BECount; - return ExitLimit(BECount, MaxBECount); + return ExitLimit(BECount, MaxBECount, P); } /// getNumIterationsInRange - Return the number of iterations of this loop that @@ -9346,6 +9458,8 @@ ValueExprMap(std::move(Arg.ValueExprMap)), WalkingBEDominatingConds(false), ProvingSplitPredicate(false), BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)), + PredicatedBackedgeTakenCounts( + std::move(Arg.PredicatedBackedgeTakenCounts)), ConstantEvolutionLoopExitValue( std::move(Arg.ConstantEvolutionLoopExitValue)), ValuesAtScopes(std::move(Arg.ValuesAtScopes)), @@ -9378,6 +9492,8 @@ // that a loop had multiple computable exits. for (auto &BTCI : BackedgeTakenCounts) BTCI.second.clear(); + for (auto &BTCI : PredicatedBackedgeTakenCounts) + BTCI.second.clear(); assert(PendingLoopPredicates.empty() && "isImpliedCond garbage"); assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!"); @@ -9420,6 +9536,20 @@ OS << "Unpredictable max backedge-taken count. "; } + OS << "\n" + "Loop "; + L->getHeader()->printAsOperand(OS, /*PrintType=*/false); + OS << ": "; + + SCEVUnionPredicate Pred; + auto PBT = SE->getPredicatedBackedgeTakenCount(L, Pred); + if (!isa(PBT)) { + OS << "Predicated backedge-taken count is " << *PBT << "\n"; + OS << " Predicates:\n"; + Pred.print(OS, 4); + } else { + OS << "Unpredictable predicated backedge-taken count. "; + } OS << "\n"; } @@ -9704,16 +9834,20 @@ ExprValueMap.erase(S); HasRecMap.erase(S); - for (DenseMap::iterator I = - BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) { - BackedgeTakenInfo &BEInfo = I->second; - if (BEInfo.hasOperand(S, this)) { - BEInfo.clear(); - BackedgeTakenCounts.erase(I++); - } - else - ++I; - } + auto RemoveSCEVFromBackedgeMap = + [S, this](DenseMap &Map) { + for (auto I = Map.begin(), E = Map.end(); I != E;) { + BackedgeTakenInfo &BEInfo = I->second; + if (BEInfo.hasOperand(S, this)) { + BEInfo.clear(); + Map.erase(I++); + } else + ++I; + } + }; + + RemoveSCEVFromBackedgeMap(BackedgeTakenCounts); + RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts); } typedef DenseMap VerifyMap; @@ -10128,7 +10262,7 @@ PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE, Loop &L) - : SE(SE), L(L), Generation(0) {} + : SE(SE), L(L), Generation(0), BackedgeCount(nullptr) {} const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) { const SCEV *Expr = SE.getSCEV(V); @@ -10149,6 +10283,15 @@ return NewSCEV; } +const SCEV *PredicatedScalarEvolution::getBackedgeTakenCount() { + if (!BackedgeCount) { + SCEVUnionPredicate BackedgePred; + BackedgeCount = SE.getPredicatedBackedgeTakenCount(&L, BackedgePred); + addPredicate(BackedgePred); + } + return BackedgeCount; +} + void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) { if (Preds.implies(&Pred)) return; @@ -10214,10 +10357,10 @@ return New; } -PredicatedScalarEvolution:: -PredicatedScalarEvolution(const PredicatedScalarEvolution &Init) : - RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds), - Generation(Init.Generation) { +PredicatedScalarEvolution::PredicatedScalarEvolution( + const PredicatedScalarEvolution &Init) + : RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds), + Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) { for (auto I = Init.FlagsMap.begin(), E = Init.FlagsMap.end(); I != E; ++I) FlagsMap.insert(*I); } Index: lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- lib/Analysis/ScalarEvolutionExpander.cpp +++ lib/Analysis/ScalarEvolutionExpander.cpp @@ -2004,7 +2004,9 @@ assert(AR->isAffine() && "Cannot generate RT check for " "non-affine expression"); - const SCEV *ExitCount = SE.getBackedgeTakenCount(AR->getLoop()); + SCEVUnionPredicate Pred; + const SCEV *ExitCount = + SE.getPredicatedBackedgeTakenCount(AR->getLoop(), Pred); const SCEV *Step = AR->getStepRecurrence(SE); const SCEV *Start = AR->getStart(); Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2778,7 +2778,7 @@ IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); // Find the loop boundaries. ScalarEvolution *SE = PSE.getSE(); - const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(OrigLoop); + const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount(); assert(BackedgeTakenCount != SE->getCouldNotCompute() && "Invalid loop count"); @@ -4425,7 +4425,7 @@ } // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop); + const SCEV *ExitCount = PSE.getBackedgeTakenCount(); if (ExitCount == PSE.getSE()->getCouldNotCompute()) { emitAnalysis(VectorizationReport() << "could not determine number of loop iterations"); Index: test/Analysis/ScalarEvolution/predicated-trip-count.ll =================================================================== --- /dev/null +++ test/Analysis/ScalarEvolution/predicated-trip-count.ll @@ -0,0 +1,109 @@ +; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@A = weak global [1000 x i32] zeroinitializer, align 32 + +; The resulting predicate is i16 {0,+,1} , meanining +; that the resulting backedge expression will be valid for: +; (1 + (-1 smax %M)) <= MAX_INT16 +; +; At the limit condition for M (MAX_INT16 - 1) we have in the +; last iteration: +; i0 <- MAX_INT16 +; i0.ext <- MAX_INT16 +; +; and therefore no wrapping happend for i0 or i0.ext +; throughout the execution of the loop. The resulting predicated +; backedge taken count is correct. + +; CHECK: Classifying expressions for: @test1 +; CHECK: %i.0.ext = sext i16 %i.0 to i32 +; CHECK-NEXT: --> (sext i16 {0,+,1}<%bb3> to i32) +; CHECK: Loop %bb3: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %bb3: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (1 + (-1 smax %M)) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,1}<%bb3> Added Flags: +define void @test1(i32 %N, i32 %M) { +entry: + br label %bb3 + +bb: ; preds = %bb3 + %tmp = getelementptr [1000 x i32], [1000 x i32]* @A, i32 0, i16 %i.0 ; [#uses=1] + store i32 123, i32* %tmp + %tmp2 = add i16 %i.0, 1 ; [#uses=1] + br label %bb3 + +bb3: ; preds = %bb, %entry + %i.0 = phi i16 [ 0, %entry ], [ %tmp2, %bb ] ; [#uses=3] + %i.0.ext = sext i16 %i.0 to i32 + %tmp3 = icmp sle i32 %i.0.ext, %M ; [#uses=1] + br i1 %tmp3, label %bb, label %bb5 + +bb5: ; preds = %bb3 + br label %return + +return: ; preds = %bb5 + ret void +} + +; The predicated backedge taken count is: +; (2 + (zext i16 %Start to i32) + ((-2 + (-1 * (sext i16 %Start to i32))) +; smax (-1 + (-1 * %M))) +; ) + +; -1 + (-1 * %M) <= (-2 + (-1 * (sext i16 %Start to i32)) +; The predicated backedge taken count is 0. +; From the IR, this is correct since we will bail out at the +; first iteration. + + +; * -1 + (-1 * %M) > (-2 + (-1 * (sext i16 %Start to i32)) +; or: %M < 1 + (sext i16 %Start to i32) +; +; The predicated backedge taken count is 1 + (zext i16 %Start to i32) - %M +; +; If %M >= MIN_INT + 1, this predicated backedge taken count would be correct (even +; without predicates). However, for %M < MIN_INT this would be an infinite loop. +; In these cases, the {%Start,+,-1} predicate would be false, as the +; final value of the expression {%Start,+,-1} expression (%M - 1) would not be +; representable as an i16. + +; There is also a limit case here where the value of %M is MIN_INT. In this case +; we still have an infinite loop, since icmp sge %x, MIN_INT will always return +; true. + +; CHECK: Classifying expressions for: @test2 + +; CHECK: %i.0.ext = sext i16 %i.0 to i32 +; CHECK-NEXT: --> (sext i16 {%Start,+,-1}<%bb3> to i32) +; CHECK: Loop %bb3: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %bb3: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (2 + (sext i16 %Start to i32) + ((-2 + (-1 * (sext i16 %Start to i32))) smax (-1 + (-1 * %M)))) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {%Start,+,-1}<%bb3> Added Flags: + +define void @test2(i32 %N, i32 %M, i16 %Start) { +entry: + br label %bb3 + +bb: ; preds = %bb3 + %tmp = getelementptr [1000 x i32], [1000 x i32]* @A, i32 0, i16 %i.0 ; [#uses=1] + store i32 123, i32* %tmp + %tmp2 = sub i16 %i.0, 1 ; [#uses=1] + br label %bb3 + +bb3: ; preds = %bb, %entry + %i.0 = phi i16 [ %Start, %entry ], [ %tmp2, %bb ] ; [#uses=3] + %i.0.ext = sext i16 %i.0 to i32 + %tmp3 = icmp sge i32 %i.0.ext, %M ; [#uses=1] + br i1 %tmp3, label %bb, label %bb5 + +bb5: ; preds = %bb3 + br label %return + +return: ; preds = %bb5 + ret void +} + Index: test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll @@ -0,0 +1,166 @@ +; RUN: opt -mtriple=aarch64--linux-gnueabi -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s -S | FileCheck %s + +; The following tests contain loops for which SCEV cannot determine the backedge +; taken count. This is because the backedge taken condition is produced by an +; icmp with one of the sides being a loop varying non-AddRec expression. +; However, there is a possibility to normalize this to an AddRec expression +; using SCEV predicates. This allows us to compute a 'guarded' backedge count. +; The Loop Vectorizer is able to version to loop in order to use this guarded +; backedge count and vectorize more loops. + + +; CHECK-LABEL: test_sge +; CHECK-LABEL: vector.scevcheck +; CHECK-LABEL: vector.body +define void @test_sge(i32* noalias %A, + i32* noalias %B, + i32* noalias %C, i32 %N) { +entry: + %cmp13 = icmp eq i32 %N, 0 + br i1 %cmp13, label %for.end, label %for.body.preheader + +for.body.preheader: + br label %for.body + +for.body: + %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ 0, %for.body.preheader ] + %indvars.next = add i16 %indvars.iv, 1 + %indvars.ext = zext i16 %indvars.iv to i32 + + %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext + %1 = load i32, i32* %arrayidx3, align 4 + + %mul4 = mul i32 %1, %0 + + %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext + store i32 %mul4, i32* %arrayidx7, align 4 + + %exitcond = icmp sge i32 %indvars.ext, %N + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + +; CHECK-LABEL: test_uge +; CHECK-LABEL: vector.scevcheck +; CHECK-LABEL: vector.body +define void @test_uge(i32* noalias %A, + i32* noalias %B, + i32* noalias %C, i32 %N, i32 %Offset) { +entry: + %cmp13 = icmp eq i32 %N, 0 + br i1 %cmp13, label %for.end, label %for.body.preheader + +for.body.preheader: + br label %for.body + +for.body: + %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ 0, %for.body.preheader ] + %indvars.next = add i16 %indvars.iv, 1 + + %indvars.ext = sext i16 %indvars.iv to i32 + %indvars.access = add i32 %Offset, %indvars.ext + + %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.access + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.access + %1 = load i32, i32* %arrayidx3, align 4 + + %mul4 = add i32 %1, %0 + + %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.access + store i32 %mul4, i32* %arrayidx7, align 4 + + %exitcond = icmp uge i32 %indvars.ext, %N + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + +; CHECK-LABEL: test_ule +; CHECK-LABEL: vector.scevcheck +; CHECK-LABEL: vector.body +define void @test_ule(i32* noalias %A, + i32* noalias %B, + i32* noalias %C, i32 %N, + i16 %M) { +entry: + %cmp13 = icmp eq i32 %N, 0 + br i1 %cmp13, label %for.end, label %for.body.preheader + +for.body.preheader: + br label %for.body + +for.body: + %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ %M, %for.body.preheader ] + %indvars.next = sub i16 %indvars.iv, 1 + %indvars.ext = zext i16 %indvars.iv to i32 + + %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext + %1 = load i32, i32* %arrayidx3, align 4 + + %mul4 = mul i32 %1, %0 + + %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext + store i32 %mul4, i32* %arrayidx7, align 4 + + %exitcond = icmp ule i32 %indvars.ext, %N + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + +; CHECK-LABEL: test_sle +; CHECK-LABEL: vector.scevcheck +; CHECK-LABEL: vector.body +define void @test_sle(i32* noalias %A, + i32* noalias %B, + i32* noalias %C, i32 %N, + i16 %M) { +entry: + %cmp13 = icmp eq i32 %N, 0 + br i1 %cmp13, label %for.end, label %for.body.preheader + +for.body.preheader: + br label %for.body + +for.body: + %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ %M, %for.body.preheader ] + %indvars.next = sub i16 %indvars.iv, 1 + %indvars.ext = sext i16 %indvars.iv to i32 + + %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext + %1 = load i32, i32* %arrayidx3, align 4 + + %mul4 = mul i32 %1, %0 + + %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext + store i32 %mul4, i32* %arrayidx7, align 4 + + %exitcond = icmp sle i32 %indvars.ext, %N + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} Index: test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll =================================================================== --- test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll +++ test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll @@ -54,8 +54,9 @@ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !16 %0 = trunc i64 %indvars.iv to i32, !dbg !16 + %ld = load i32, i32* %arrayidx, align 4 store i32 %0, i32* %arrayidx, align 4, !dbg !16, !tbaa !18 - %cmp3 = icmp sle i32 %0, %Length, !dbg !22 + %cmp3 = icmp sle i32 %ld, %Length, !dbg !22 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !12 %1 = trunc i64 %indvars.iv.next to i32 %cmp = icmp slt i32 %1, %Length, !dbg !12