Index: include/llvm/Analysis/ScalarEvolution.h =================================================================== --- include/llvm/Analysis/ScalarEvolution.h +++ include/llvm/Analysis/ScalarEvolution.h @@ -1741,6 +1741,12 @@ const SCEV *computeBECount(const SCEV *Delta, const SCEV *Stride, bool Equality); + // Compute the maximum backedge count based on the range of values + // permitted by Start, End, and Stride. + const SCEV *computeMaxBECount(const SCEV *Start, const SCEV *Stride, + const SCEV *End, unsigned BitWidth, + bool IsSigned); + /// Verify if an linear IV with positive stride can overflow when in a /// less-than comparison, knowing the invariant term of the comparison, /// the stride and the knowledge of NSW/NUW flags on the recurrence. Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -9672,14 +9672,51 @@ return getUDivExpr(Delta, Step); } +const SCEV *ScalarEvolution::computeMaxBECount(const SCEV *Start, + const SCEV *Stride, + const SCEV *End, + unsigned BitWidth, + bool IsSigned) { + + // Calculate the maximum backedge count based on the range of values + // permitted by Start, End, and Stride. + const SCEV *MaxBECount; + APInt MinStart = + IsSigned ? getSignedRangeMin(Start) : getUnsignedRangeMin(Start); + + APInt StrideForMaxBECount; + + bool PositiveStride = isKnownPositive(Stride); + if (PositiveStride) + StrideForMaxBECount = + IsSigned ? getSignedRangeMin(Stride) : getUnsignedRangeMin(Stride); + else + // Using a stride of 1 is safe when computing max backedge taken count for + // a loop with unknown stride. + StrideForMaxBECount = APInt(BitWidth, 1, IsSigned); + + APInt Limit = + IsSigned ? APInt::getSignedMaxValue(BitWidth) - (StrideForMaxBECount - 1) + : APInt::getMaxValue(BitWidth) - (StrideForMaxBECount - 1); + + // Although End can be a MAX expression we estimate MaxEnd considering only + // the case End = RHS of the loop termination condition. This is safe because + // in the other case (End - Start) is zero, leading to a zero maximum backedge + // taken count. + APInt MaxEnd = IsSigned ? APIntOps::smin(getSignedRangeMax(End), Limit) + : APIntOps::umin(getUnsignedRangeMax(End), Limit); + + MaxBECount = computeBECount(getConstant(MaxEnd - MinStart), + getConstant(StrideForMaxBECount), false); + + return MaxBECount; +} + ScalarEvolution::ExitLimit ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool IsSigned, bool ControlsExit, bool AllowPredicates) { SmallPtrSet Predicates; - // We handle only IV < Invariant - if (!isLoopInvariant(RHS, L)) - return getCouldNotCompute(); const SCEVAddRecExpr *IV = dyn_cast(LHS); bool PredicatedIV = false; @@ -9762,6 +9799,17 @@ : ICmpInst::ICMP_ULT; const SCEV *Start = IV->getStart(); const SCEV *End = RHS; + // When the RHS is not invariant, we do not know the end bound of the loop and + // cannot calculate the ExactBECount needed by ExitLimit. However, we can + // calculate the MaxBECount, given the start, stride and max value for the end + // bound of the loop (RHS), and the fact that IV does not overflow (which is + // checked above). + if (!isLoopInvariant(RHS, L)) { + const SCEV *MaxBECount = computeMaxBECount( + Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned); + return ExitLimit(getCouldNotCompute() /* ExactNotTaken */, MaxBECount, + false /*MaxOrZero*/, Predicates); + } // If the backedge is taken at least once, then it will be taken // (End-Start)/Stride times (rounded up to a multiple of Stride), where Start // is the LHS value of the less-than comparison the first time it is evaluated @@ -9794,37 +9842,8 @@ MaxBECount = BECountIfBackedgeTaken; MaxOrZero = true; } else { - // Calculate the maximum backedge count based on the range of values - // permitted by Start, End, and Stride. - APInt MinStart = IsSigned ? getSignedRangeMin(Start) - : getUnsignedRangeMin(Start); - - unsigned BitWidth = getTypeSizeInBits(LHS->getType()); - - APInt StrideForMaxBECount; - - if (PositiveStride) - StrideForMaxBECount = - IsSigned ? getSignedRangeMin(Stride) - : getUnsignedRangeMin(Stride); - else - // Using a stride of 1 is safe when computing max backedge taken count for - // a loop with unknown stride. - StrideForMaxBECount = APInt(BitWidth, 1, IsSigned); - - APInt Limit = - IsSigned ? APInt::getSignedMaxValue(BitWidth) - (StrideForMaxBECount - 1) - : APInt::getMaxValue(BitWidth) - (StrideForMaxBECount - 1); - - // Although End can be a MAX expression we estimate MaxEnd considering only - // the case End = RHS. This is safe because in the other case (End - Start) - // is zero, leading to a zero maximum backedge taken count. - APInt MaxEnd = - IsSigned ? APIntOps::smin(getSignedRangeMax(RHS), Limit) - : APIntOps::umin(getUnsignedRangeMax(RHS), Limit); - - MaxBECount = computeBECount(getConstant(MaxEnd - MinStart), - getConstant(StrideForMaxBECount), false); + MaxBECount = computeMaxBECount(Start, Stride, RHS, + getTypeSizeInBits(LHS->getType()), IsSigned); } if (isa(MaxBECount) && Index: test/Analysis/ScalarEvolution/max-trip-count.ll =================================================================== --- test/Analysis/ScalarEvolution/max-trip-count.ll +++ test/Analysis/ScalarEvolution/max-trip-count.ll @@ -288,3 +288,75 @@ exit: ret i32 0 } + +; The end bound of the loop can change between iterations, so the exact trip +; count is unknown, but SCEV can calculate the max trip count. +define void @changing_end_bound(i32* %n_addr, i32* %addr) { +; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound +; CHECK: Loop %loop: Unpredictable backedge-taken count. +; CHECK: Loop %loop: max backedge-taken count is 2147483646 +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ] + %val = load atomic i32, i32* %addr unordered, align 4 + fence acquire + %acc.next = add i32 %acc, %val + %iv.next = add nsw i32 %iv, 1 + %n = load atomic i32, i32* %n_addr unordered, align 4 + %cmp = icmp slt i32 %iv.next, %n + br i1 %cmp, label %loop, label %loop.exit + +loop.exit: + ret void +} + +; Similar test as above, but unknown start value. +; Also, there's no nsw on the iv.next, but SCEV knows +; the termination condition is LT, so the IV cannot wrap. +define void @changing_end_bound2(i32 %start, i32* %n_addr, i32* %addr) { +; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound2 +; CHECK: Loop %loop: Unpredictable backedge-taken count. +; CHECK: Loop %loop: max backedge-taken count is -1 +entry: + br label %loop + +loop: + %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop ] + %acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ] + %val = load atomic i32, i32* %addr unordered, align 4 + fence acquire + %acc.next = add i32 %acc, %val + %iv.next = add i32 %iv, 1 + %n = load atomic i32, i32* %n_addr unordered, align 4 + %cmp = icmp slt i32 %iv.next, %n + br i1 %cmp, label %loop, label %loop.exit + +loop.exit: + ret void +} + +; changing end bound and greater than one stride +define void @changing_end_bound3(i32 %start, i32* %n_addr, i32* %addr) { +; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound3 +; CHECK: Loop %loop: Unpredictable backedge-taken count. +; CHECK: Loop %loop: max backedge-taken count is 1073741823 +entry: + br label %loop + +loop: + %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop ] + %acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ] + %val = load atomic i32, i32* %addr unordered, align 4 + fence acquire + %acc.next = add i32 %acc, %val + %iv.next = add nsw i32 %iv, 4 + %n = load atomic i32, i32* %n_addr unordered, align 4 + %cmp = icmp slt i32 %iv.next, %n + br i1 %cmp, label %loop, label %loop.exit + +loop.exit: + ret void +}