diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -2022,12 +2022,15 @@ Optional>> createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI); - /// Compute the backedge taken count knowing the interval difference, and - /// the stride for an inequality. Result takes the form: - /// (Delta + (Stride - 1)) udiv Stride. - /// Caller must ensure that this expression either does not overflow or - /// that the result is undefined if it does. - const SCEV *computeBECount(const SCEV *Delta, const SCEV *Stride); + /// Compute ceil(N / D). N and D are treated as unsigned values. + /// + /// Since SCEV doesn't have native ceiling division, this generates a + /// SCEV expression of the following form: + /// + /// umin(N, 1) + floor((N - umin(N, 1)) / D) + /// + /// A denominator of zero or poison is handled the same way as getUDivExpr(). + const SCEV *getUDivCeilSCEV(const SCEV *N, const SCEV *D); /// Compute the maximum backedge count based on the range of values /// permitted by Start, End, and Stride. This is for loops of the form diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -11519,11 +11519,13 @@ return (std::move(MinValue) + MaxStrideMinusOne).ugt(MinRHS); } -const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, - const SCEV *Step) { - const SCEV *One = getOne(Step->getType()); - Delta = getAddExpr(Delta, getMinusSCEV(Step, One)); - return getUDivExpr(Delta, Step); +const SCEV *ScalarEvolution::getUDivCeilSCEV(const SCEV *N, const SCEV *D) { + // umin(N, 1) + floor((N - umin(N, 1)) / D) + // This is equivalent to "1 + floor((N - 1) / D)" for N != 0. The umin + // expression fixes the case of N=0. + const SCEV *MinNOne = getUMinExpr(N, getOne(N->getType())); + const SCEV *NMinusOne = getMinusSCEV(N, MinNOne); + return getAddExpr(MinNOne, getUDivExpr(NMinusOne, D)); } const SCEV *ScalarEvolution::computeMaxBECountForLT(const SCEV *Start, @@ -11562,8 +11564,8 @@ APInt MaxEnd = IsSigned ? APIntOps::smin(getSignedRangeMax(End), Limit) : APIntOps::umin(getUnsignedRangeMax(End), Limit); - MaxBECount = computeBECount(getConstant(MaxEnd - MinStart) /* Delta */, - getConstant(StrideForMaxBECount) /* Step */); + MaxBECount = getUDivCeilSCEV(getConstant(MaxEnd - MinStart) /* Delta */, + getConstant(StrideForMaxBECount) /* Step */); return MaxBECount; } @@ -11591,7 +11593,6 @@ auto WrapType = IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW; bool NoWrap = ControlsExit && IV->getNoWrapFlags(WrapType); - ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; const SCEV *Stride = IV->getStepRecurrence(*this); @@ -11704,7 +11705,6 @@ return RHS; } - const SCEV *End = RHS; // When the RHS is not invariant, we do not know the end bound of the loop and // cannot calculate the ExactBECount needed by ExitLimit. However, we can // calculate the MaxBECount, given the start, stride and max value for the end @@ -11716,38 +11716,71 @@ return ExitLimit(getCouldNotCompute() /* ExactNotTaken */, MaxBECount, false /*MaxOrZero*/, Predicates); } - // If the backedge is taken at least once, then it will be taken - // (End-Start)/Stride times (rounded up to a multiple of Stride), where Start - // is the LHS value of the less-than comparison the first time it is evaluated - // and End is the RHS. - const SCEV *BECountIfBackedgeTaken = - computeBECount(getMinusSCEV(End, Start), Stride); - // If the loop entry is guarded by the result of the backedge test of the - // first loop iteration, then we know the backedge will be taken at least - // once and so the backedge taken count is as above. If not then we use the - // expression (max(End,Start)-Start)/Stride to describe the backedge count, - // as if the backedge is taken at least once max(End,Start) is End and so the - // result is as above, and if not max(End,Start) is Start so we get a backedge - // count of zero. + const SCEV *BECount; - if (isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(OrigStart, Stride), OrigRHS)) - BECount = BECountIfBackedgeTaken; - else { - // If we know that RHS >= Start in the context of loop, then we know that - // max(RHS, Start) = RHS at this point. - if (isLoopEntryGuardedByCond( - L, IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, OrigRHS, OrigStart)) + const SCEV *BECountIfBackedgeTaken = nullptr; + ICmpInst::Predicate CondGE = + IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; + ICmpInst::Predicate CondGT = + IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; + + if (isLoopEntryGuardedByCond(L, CondGT, RHS, + getMinusSCEV(OrigStart, Stride))) { + // FIXME: Missing check: + // isLoopEntryGuardedByCond(L, CondGT, Start, StartMinusStride) + // + // Suppose "Stride > 0", "Start - Stride" doesn't overflow, and + // "RHS > Start - Stride". + // + // If RHS >= Start, in general, the backedge-taken count is + // "ceil((RHS - Start) / Stride)". This is equvalent to + // "((RHS - Start) + (Stride - 1)) /u Stride" in arbitrary-precision math. + // We can reassociate that to "((RHS - 1) - (Start - Stride)) /u Stride". + // According to our preconditions, "RHS - 1" and "Start - Stride" can't + // overflow, and "((RHS - 1) - (Start - Stride))" fits into an unsigned + // integer, so we can just emit that expression directly. + // + // If "RHS < Start", in general, the backedge-taken count is zero. + // "RHS < Start" implies "((RHS - 1) - (Start - Stride)) < Stride", + // so the above formula evaluates to zero. + const SCEV *MinusOne = getMinusOne(Stride->getType()); + const SCEV *Numerator = + getMinusSCEV(getAddExpr(RHS, MinusOne), getMinusSCEV(Start, Stride)); + BECount = getUDivExpr(Numerator, Stride); + } else { + const SCEV *End; + if (isLoopEntryGuardedByCond(L, CondGE, OrigRHS, OrigStart)) { End = RHS; - else + } else { + // If RHS < Start, the backedge will be taken zero times. So in + // general, we can write the backedge-taken count as: + // + // RHS >= Start ? ceil((RHS - Start) / Stride) : 0 + // + // We convert it to the following to make it more convenient for SCEV: + // + // ceil((max(RHS, Start) - Start) / Stride) End = IsSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start); - BECount = computeBECount(getMinusSCEV(End, Start), Stride); + + // See what would happen if we assume the backedge is taken. This is + // used to compute MaxBECount. + BECountIfBackedgeTaken = getUDivCeilSCEV(getMinusSCEV(RHS, Start), Stride); + } + // Convert ceil(D / S) to floor((D + (S - 1)) / S). + // FIXME: The addition can overflow, in general. Will be addressed in a + // followup. + const SCEV *MinusOne = getMinusOne(Stride->getType()); + const SCEV *Delta = getMinusSCEV(End, Start); + BECount = + getUDivExpr(getAddExpr(Delta, getAddExpr(Stride, MinusOne)), Stride); } const SCEV *MaxBECount; bool MaxOrZero = false; - if (isa(BECount)) + if (isa(BECount)) { MaxBECount = BECount; - else if (isa(BECountIfBackedgeTaken)) { + } else if (BECountIfBackedgeTaken && + isa(BECountIfBackedgeTaken)) { // If we know exactly how many times the backedge will be taken if it's // taken at least once, then the backedge count will either be that or // zero. @@ -11826,7 +11859,13 @@ return End; } - const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride); + // Compute ((Start - End) + (Stride - 1)) / Stride. + // FIXME: Both the subtraction and the addition can overflow. Holding off on + // fixing this for now; we plan to merge howManyGreaterThans with + // howManyLessThans. + const SCEV *One = getOne(Stride->getType()); + const SCEV *BECount = getUDivExpr( + getAddExpr(getMinusSCEV(Start, End), getMinusSCEV(Stride, One)), Stride); APInt MaxStart = IsSigned ? getSignedRangeMax(Start) : getUnsignedRangeMax(Start); @@ -11847,11 +11886,8 @@ const SCEV *MaxBECount = isa(BECount) ? BECount - : computeBECount(getConstant(MaxStart - MinEnd), - getConstant(MinStride)); - - if (isa(MaxBECount)) - MaxBECount = BECount; + : getUDivCeilSCEV(getConstant(MaxStart - MinEnd), + getConstant(MinStride)); return ExitLimit(BECount, MaxBECount, false, Predicates); } diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll --- a/llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll @@ -4,7 +4,7 @@ ; ScalarEvolution should be able to compute trip count of the loop by proving ; that this is not an infinite loop with side effects. -; CHECK: Determining loop execution counts for: @foo1 +; CHECK-LABEL: Determining loop execution counts for: @foo1 ; CHECK: backedge-taken count is ((-1 + %n) /u %s) ; We should have a conservative estimate for the max backedge taken count for @@ -34,7 +34,7 @@ ; Check that we are able to compute trip count of a loop without an entry guard. -; CHECK: Determining loop execution counts for: @foo2 +; CHECK-LABEL: Determining loop execution counts for: @foo2 ; CHECK: backedge-taken count is ((-1 + (%n smax %s)) /u %s) ; We should have a conservative estimate for the max backedge taken count for @@ -61,7 +61,7 @@ ; Check that without mustprogress we don't make assumptions about infinite ; loops being UB. -; CHECK: Determining loop execution counts for: @foo3 +; CHECK-LABEL: Determining loop execution counts for: @foo3 ; CHECK: Loop %for.body: Unpredictable backedge-taken count. ; CHECK: Loop %for.body: Unpredictable max backedge-taken count. @@ -84,7 +84,7 @@ } ; Same as foo2, but with mustprogress on loop, not function -; CHECK: Determining loop execution counts for: @foo4 +; CHECK-LABEL: Determining loop execution counts for: @foo4 ; CHECK: backedge-taken count is ((-1 + (%n smax %s)) /u %s) ; CHECK: max backedge-taken count is -1 @@ -106,5 +106,31 @@ ret void } +; A more complex case with pre-increment compare instead of post-increment. +; CHECK-LABEL: Determining loop execution counts for: @foo5 +; CHECK: Loop %for.body: backedge-taken count is ((-1 + (-1 * %start) + (%n smax %start) + %s) /u %s) + +; We should have a conservative estimate for the max backedge taken count for +; loops with unknown stride. +; CHECK: max backedge-taken count is -1 + +define void @foo5(i32* nocapture %A, i32 %n, i32 %s, i32 %start) mustprogress { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ %add, %for.body ], [ %start, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.05 + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %add = add nsw i32 %i.05, %s + %cmp = icmp slt i32 %i.05, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + !8 = distinct !{!8, !9} !9 = !{!"llvm.loop.mustprogress"}