Index: llvm/trunk/include/llvm/Analysis/ScalarEvolution.h =================================================================== --- llvm/trunk/include/llvm/Analysis/ScalarEvolution.h +++ llvm/trunk/include/llvm/Analysis/ScalarEvolution.h @@ -1741,6 +1741,12 @@ const SCEV *computeBECount(const SCEV *Delta, const SCEV *Stride, bool Equality); + // Compute the maximum backedge count based on the range of values + // permitted by Start, End, and Stride. + const SCEV *computeMaxBECount(const SCEV *Start, const SCEV *Stride, + const SCEV *End, unsigned BitWidth, + bool IsSigned); + /// Verify if an linear IV with positive stride can overflow when in a /// less-than comparison, knowing the invariant term of the comparison, /// the stride and the knowledge of NSW/NUW flags on the recurrence. Index: llvm/trunk/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolution.cpp +++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp @@ -9689,14 +9689,54 @@ return getUDivExpr(Delta, Step); } +const SCEV *ScalarEvolution::computeMaxBECount(const SCEV *Start, + const SCEV *Stride, + const SCEV *End, + unsigned BitWidth, + bool IsSigned) { + + assert(!isKnownNonPositive(Stride) && + "Stride is expected strictly positive!"); + // Calculate the maximum backedge count based on the range of values + // permitted by Start, End, and Stride. + const SCEV *MaxBECount; + APInt MinStart = + IsSigned ? getSignedRangeMin(Start) : getUnsignedRangeMin(Start); + + APInt StrideForMaxBECount; + + bool PositiveStride = isKnownPositive(Stride); + if (PositiveStride) + StrideForMaxBECount = + IsSigned ? getSignedRangeMin(Stride) : getUnsignedRangeMin(Stride); + else + // Using a stride of 1 is safe when computing max backedge taken count for + // a loop with unknown stride. + StrideForMaxBECount = APInt(BitWidth, 1, IsSigned); + + APInt MaxValue = IsSigned ? APInt::getSignedMaxValue(BitWidth) + : APInt::getMaxValue(BitWidth); + APInt Limit = MaxValue - (StrideForMaxBECount - 1); + + // Although End can be a MAX expression we estimate MaxEnd considering only + // the case End = RHS of the loop termination condition. This is safe because + // in the other case (End - Start) is zero, leading to a zero maximum backedge + // taken count. + APInt MaxEnd = IsSigned ? APIntOps::smin(getSignedRangeMax(End), Limit) + : APIntOps::umin(getUnsignedRangeMax(End), Limit); + + MaxBECount = computeBECount(getConstant(MaxEnd - MinStart) /* Delta */, + getConstant(StrideForMaxBECount) /* Step */, + false /* Equality */); + + return MaxBECount; +} + ScalarEvolution::ExitLimit ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool IsSigned, bool ControlsExit, bool AllowPredicates) { SmallPtrSet Predicates; - // We handle only IV < Invariant - if (!isLoopInvariant(RHS, L)) - return getCouldNotCompute(); const SCEVAddRecExpr *IV = dyn_cast(LHS); bool PredicatedIV = false; @@ -9779,6 +9819,17 @@ : ICmpInst::ICMP_ULT; const SCEV *Start = IV->getStart(); const SCEV *End = RHS; + // When the RHS is not invariant, we do not know the end bound of the loop and + // cannot calculate the ExactBECount needed by ExitLimit. However, we can + // calculate the MaxBECount, given the start, stride and max value for the end + // bound of the loop (RHS), and the fact that IV does not overflow (which is + // checked above). + if (!isLoopInvariant(RHS, L)) { + const SCEV *MaxBECount = computeMaxBECount( + Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned); + return ExitLimit(getCouldNotCompute() /* ExactNotTaken */, MaxBECount, + false /*MaxOrZero*/, Predicates); + } // If the backedge is taken at least once, then it will be taken // (End-Start)/Stride times (rounded up to a multiple of Stride), where Start // is the LHS value of the less-than comparison the first time it is evaluated @@ -9811,37 +9862,8 @@ MaxBECount = BECountIfBackedgeTaken; MaxOrZero = true; } else { - // Calculate the maximum backedge count based on the range of values - // permitted by Start, End, and Stride. - APInt MinStart = IsSigned ? getSignedRangeMin(Start) - : getUnsignedRangeMin(Start); - - unsigned BitWidth = getTypeSizeInBits(LHS->getType()); - - APInt StrideForMaxBECount; - - if (PositiveStride) - StrideForMaxBECount = - IsSigned ? getSignedRangeMin(Stride) - : getUnsignedRangeMin(Stride); - else - // Using a stride of 1 is safe when computing max backedge taken count for - // a loop with unknown stride. - StrideForMaxBECount = APInt(BitWidth, 1, IsSigned); - - APInt Limit = - IsSigned ? APInt::getSignedMaxValue(BitWidth) - (StrideForMaxBECount - 1) - : APInt::getMaxValue(BitWidth) - (StrideForMaxBECount - 1); - - // Although End can be a MAX expression we estimate MaxEnd considering only - // the case End = RHS. This is safe because in the other case (End - Start) - // is zero, leading to a zero maximum backedge taken count. - APInt MaxEnd = - IsSigned ? APIntOps::smin(getSignedRangeMax(RHS), Limit) - : APIntOps::umin(getUnsignedRangeMax(RHS), Limit); - - MaxBECount = computeBECount(getConstant(MaxEnd - MinStart), - getConstant(StrideForMaxBECount), false); + MaxBECount = computeMaxBECount(Start, Stride, RHS, + getTypeSizeInBits(LHS->getType()), IsSigned); } if (isa(MaxBECount) && Index: llvm/trunk/test/Analysis/ScalarEvolution/max-trip-count.ll =================================================================== --- llvm/trunk/test/Analysis/ScalarEvolution/max-trip-count.ll +++ llvm/trunk/test/Analysis/ScalarEvolution/max-trip-count.ll @@ -288,3 +288,146 @@ exit: ret i32 0 } + +; The end bound of the loop can change between iterations, so the exact trip +; count is unknown, but SCEV can calculate the max trip count. +define void @changing_end_bound(i32* %n_addr, i32* %addr) { +; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound +; CHECK: Loop %loop: Unpredictable backedge-taken count. +; CHECK: Loop %loop: max backedge-taken count is 2147483646 +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ] + %val = load atomic i32, i32* %addr unordered, align 4 + fence acquire + %acc.next = add i32 %acc, %val + %iv.next = add nsw i32 %iv, 1 + %n = load atomic i32, i32* %n_addr unordered, align 4 + %cmp = icmp slt i32 %iv.next, %n + br i1 %cmp, label %loop, label %loop.exit + +loop.exit: + ret void +} + +; Similar test as above, but unknown start value. +; Also, there's no nsw on the iv.next, but SCEV knows +; the termination condition is LT, so the IV cannot wrap. +define void @changing_end_bound2(i32 %start, i32* %n_addr, i32* %addr) { +; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound2 +; CHECK: Loop %loop: Unpredictable backedge-taken count. +; CHECK: Loop %loop: max backedge-taken count is -1 +entry: + br label %loop + +loop: + %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop ] + %acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ] + %val = load atomic i32, i32* %addr unordered, align 4 + fence acquire + %acc.next = add i32 %acc, %val + %iv.next = add i32 %iv, 1 + %n = load atomic i32, i32* %n_addr unordered, align 4 + %cmp = icmp slt i32 %iv.next, %n + br i1 %cmp, label %loop, label %loop.exit + +loop.exit: + ret void +} + +; changing end bound and greater than one stride +define void @changing_end_bound3(i32 %start, i32* %n_addr, i32* %addr) { +; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound3 +; CHECK: Loop %loop: Unpredictable backedge-taken count. +; CHECK: Loop %loop: max backedge-taken count is 1073741823 +entry: + br label %loop + +loop: + %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop ] + %acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ] + %val = load atomic i32, i32* %addr unordered, align 4 + fence acquire + %acc.next = add i32 %acc, %val + %iv.next = add nsw i32 %iv, 4 + %n = load atomic i32, i32* %n_addr unordered, align 4 + %cmp = icmp slt i32 %iv.next, %n + br i1 %cmp, label %loop, label %loop.exit + +loop.exit: + ret void +} + +; same as above test, but the IV can wrap around. +; so the max backedge taken count is unpredictable. +define void @changing_end_bound4(i32 %start, i32* %n_addr, i32* %addr) { +; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound4 +; CHECK: Loop %loop: Unpredictable backedge-taken count. +; CHECK: Loop %loop: Unpredictable max backedge-taken count. +entry: + br label %loop + +loop: + %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop ] + %acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ] + %val = load atomic i32, i32* %addr unordered, align 4 + fence acquire + %acc.next = add i32 %acc, %val + %iv.next = add i32 %iv, 4 + %n = load atomic i32, i32* %n_addr unordered, align 4 + %cmp = icmp slt i32 %iv.next, %n + br i1 %cmp, label %loop, label %loop.exit + +loop.exit: + ret void +} + +; unknown stride. Since it's not knownPositive, we do not estimate the max +; backedge taken count. +define void @changing_end_bound5(i32 %stride, i32 %start, i32* %n_addr, i32* %addr) { +; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound5 +; CHECK: Loop %loop: Unpredictable backedge-taken count. +; CHECK: Loop %loop: Unpredictable max backedge-taken count. +entry: + br label %loop + +loop: + %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop ] + %acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ] + %val = load atomic i32, i32* %addr unordered, align 4 + fence acquire + %acc.next = add i32 %acc, %val + %iv.next = add nsw i32 %iv, %stride + %n = load atomic i32, i32* %n_addr unordered, align 4 + %cmp = icmp slt i32 %iv.next, %n + br i1 %cmp, label %loop, label %loop.exit + +loop.exit: + ret void +} + +; negative stride value +define void @changing_end_bound6(i32 %start, i32* %n_addr, i32* %addr) { +; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound6 +; CHECK: Loop %loop: Unpredictable backedge-taken count. +; CHECK: Loop %loop: Unpredictable max backedge-taken count. +entry: + br label %loop + +loop: + %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop ] + %acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ] + %val = load atomic i32, i32* %addr unordered, align 4 + fence acquire + %acc.next = add i32 %acc, %val + %iv.next = add nsw i32 %iv, -1 + %n = load atomic i32, i32* %n_addr unordered, align 4 + %cmp = icmp slt i32 %iv.next, %n + br i1 %cmp, label %loop, label %loop.exit + +loop.exit: + ret void +} Index: llvm/trunk/test/Transforms/LoopSimplify/preserve-scev.ll =================================================================== --- llvm/trunk/test/Transforms/LoopSimplify/preserve-scev.ll +++ llvm/trunk/test/Transforms/LoopSimplify/preserve-scev.ll @@ -13,7 +13,7 @@ ; CHECK: %[[PHI:.*]] = phi i32 [ 0, %entry ], [ %{{.*}}, %if.then5 ], [ %[[PHI]], %if.end ] ; CHECK-LABEL: Determining loop execution counts for: @test ; CHECK: Loop %for.body18: Unpredictable backedge-taken count. -; CHECK: Loop %for.body18: Unpredictable max backedge-taken count. +; CHECK: Loop %for.body18: max backedge-taken count is 2147483646 ; CHECK: Loop %for.body18: Unpredictable predicated backedge-taken count. ; CHECK: Loop %for.cond: Unpredictable backedge-taken count. ; CHECK: Loop %for.cond: Unpredictable max backedge-taken count. @@ -25,7 +25,7 @@ ; CHECK: phi i32 [ %{{.*}}, %if.then5 ], [ 0, %entry ] ; CHECK-LABEL: Determining loop execution counts for: @test ; CHECK: Loop %for.body18: Unpredictable backedge-taken count. -; CHECK: Loop %for.body18: Unpredictable max backedge-taken count. +; CHECK: Loop %for.body18: max backedge-taken count is 2147483646 ; CHECK: Loop %for.body18: Unpredictable predicated backedge-taken count. ; CHECK: Loop %for.cond: Unpredictable backedge-taken count. ; CHECK: Loop %for.cond: max backedge-taken count is -2147483647