diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1811,7 +1811,7 @@ /// SCEV predicates in order to return an exact answer. ExitLimit howManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool isSigned, bool ControlsExit, - bool AllowPredicates = false); + bool AllowPredicates, ICmpInst *OrigCond); ExitLimit howManyGreaterThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool isSigned, bool IsSubExpr, diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -8108,7 +8108,7 @@ case ICmpInst::ICMP_ULT: { // while (X < Y) bool IsSigned = Pred == ICmpInst::ICMP_SLT; ExitLimit EL = howManyLessThans(LHS, RHS, L, IsSigned, ControlsExit, - AllowPredicates); + AllowPredicates, ExitCond); if (EL.hasAnyInfo()) return EL; break; } @@ -11573,7 +11573,8 @@ ScalarEvolution::ExitLimit ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool IsSigned, - bool ControlsExit, bool AllowPredicates) { + bool ControlsExit, bool AllowPredicates, + ICmpInst *OrigCond) { SmallPtrSet Predicates; const SCEVAddRecExpr *IV = dyn_cast(LHS); @@ -11765,8 +11766,29 @@ const SCEV *BECount = nullptr; auto *StartMinusStride = getMinusSCEV(OrigStart, Stride); // Can we prove (max(RHS,Start) > Start - Stride? - if (isLoopEntryGuardedByCond(L, Cond, StartMinusStride, Start) && - isLoopEntryGuardedByCond(L, Cond, StartMinusStride, RHS)) { + if (isLoopEntryGuardedByCond(L, Cond, StartMinusStride, OrigRHS)) { + auto MaySubOverflow = [&]() { + // Start - Stride < Start implies no overflow. + if (isLoopEntryGuardedByCond(L, Cond, StartMinusStride, Start)) + return false; + // Check if we have an IR instruction feeding into the branch + // "add nsw IVMinusStride, Stride" + if (getSCEV(OrigCond->getOperand(0)) == IV) { + if (auto *Add = dyn_cast(OrigCond->getOperand(0))) { + if (IsSigned ? Add->hasNoSignedWrap() : Add->hasNoUnsignedWrap()) { + if (getSCEV(Add->getOperand(1)) == Stride) + return false; + } + } + if (auto *GEP = dyn_cast(OrigCond->getOperand(0))) { + if (!IsSigned && GEP->isInBounds()) { + if (getSCEV(GEP->getOperand(0)) == getMinusSCEV(IV, Stride)) + return false; + } + } + } + return true; + }; // In this case, we can use a refined formula for computing backedge taken // count. The general formula remains: // "End-Start /uceiling Stride" where "End = max(RHS,Start)" @@ -11785,10 +11807,10 @@ // "((RHS - 1) - (Start - Stride)) /u Stride" reassociates to // "((RHS - (Start - Stride) - 1) /u Stride". // Our preconditions trivially imply no overflow in that form. - const SCEV *MinusOne = getMinusOne(Stride->getType()); - const SCEV *Numerator = - getMinusSCEV(getAddExpr(RHS, MinusOne), StartMinusStride); - if (!isa(Numerator)) { + if (!MaySubOverflow()) { + const SCEV *MinusOne = getMinusOne(Stride->getType()); + const SCEV *Numerator = + getMinusSCEV(getAddExpr(RHS, MinusOne), getMinusSCEV(Start, Stride)); BECount = getUDivExpr(Numerator, Stride); } } diff --git a/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll b/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll --- a/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll +++ b/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll @@ -122,6 +122,37 @@ ret void } +define void @pointer_iv_nowrap_guard(i32* %startptr, i32* %endptr) local_unnamed_addr { +; CHECK-LABEL: 'pointer_iv_nowrap_guard' +; CHECK-NEXT: Classifying expressions for: @pointer_iv_nowrap_guard +; CHECK-NEXT: %init = getelementptr inbounds i32, i32* %startptr, i64 2000 +; CHECK-NEXT: --> (8000 + %startptr) U: [8000,0) S: [8000,0) +; CHECK-NEXT: %iv = phi i32* [ %init, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: --> {(8000 + %startptr),+,4}<%loop> U: [8000,0) S: [8000,0) Exits: (8000 + (4 * ((-8001 + (-1 * (ptrtoint i32* %startptr to i64)) + (ptrtoint i32* %endptr to i64)) /u 4)) + %startptr) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = getelementptr inbounds i32, i32* %iv, i64 1 +; CHECK-NEXT: --> {(8004 + %startptr),+,4}<%loop> U: [8004,0) S: [8004,0) Exits: (8004 + (4 * ((-8001 + (-1 * (ptrtoint i32* %startptr to i64)) + (ptrtoint i32* %endptr to i64)) /u 4)) + %startptr) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @pointer_iv_nowrap_guard +; CHECK-NEXT: Loop %loop: backedge-taken count is ((-8001 + (-1 * (ptrtoint i32* %startptr to i64)) + (ptrtoint i32* %endptr to i64)) /u 4) +; CHECK-NEXT: Loop %loop: max backedge-taken count is 4611686018427385902 +; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-8001 + (-1 * (ptrtoint i32* %startptr to i64)) + (ptrtoint i32* %endptr to i64)) /u 4) +; CHECK-NEXT: Predicates: +; CHECK: Loop %loop: Trip multiple is 1 +; +entry: + %init = getelementptr inbounds i32, i32* %startptr, i64 2000 + %cmp2 = icmp ult i32* %init, %endptr + br i1 %cmp2, label %loop, label %end + +loop: + %iv = phi i32* [ %init, %entry ], [ %iv.next, %loop ] + %iv.next = getelementptr inbounds i32, i32* %iv, i64 1 + %ec = icmp uge i32* %iv.next, %endptr + br i1 %ec, label %end, label %loop + +end: + ret void +} + define i32 @dummy(i32 %start, i32* %p, i32* %q) { entry: ret i32 0 diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll --- a/llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll @@ -5,7 +5,7 @@ ; that this is not an infinite loop with side effects. ; CHECK-LABEL: Determining loop execution counts for: @foo1 -; CHECK: backedge-taken count is ((-1 + (%n smax %s)) /u %s) +; CHECK: backedge-taken count is ((-1 + %n) /u %s) ; We should have a conservative estimate for the max backedge taken count for ; loops with unknown stride. diff --git a/llvm/test/Transforms/LoopReroll/nonconst_lb.ll b/llvm/test/Transforms/LoopReroll/nonconst_lb.ll --- a/llvm/test/Transforms/LoopReroll/nonconst_lb.ll +++ b/llvm/test/Transforms/LoopReroll/nonconst_lb.ll @@ -17,24 +17,22 @@ ; CHECK-NEXT: [[CMP34:%.*]] = icmp slt i32 [[M:%.*]], [[N:%.*]] ; CHECK-NEXT: br i1 [[CMP34]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[M]], 4 -; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N]], i32 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SMAX]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[M]] -; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP2]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i32 [[TMP3]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i32 [[TMP4]], 3 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[M]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i32 [[TMP3]], 3 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[M]], [[INDVAR]] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP7]], 2 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP6]] +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[M]], [[INDVAR]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP6]], 2 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP5]] ; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVAR]], [[TMP5]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVAR]], [[TMP4]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]]