diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1719,7 +1719,7 @@ /// SCEV predicates in order to return an exact answer. ExitLimit howManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool isSigned, bool ControlsExit, - bool AllowPredicates = false); + bool AllowPredicates, ICmpInst *OrigCond); ExitLimit howManyGreaterThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool isSigned, bool IsSubExpr, diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -8115,7 +8115,7 @@ case ICmpInst::ICMP_ULT: { // while (X < Y) bool IsSigned = Pred == ICmpInst::ICMP_SLT; ExitLimit EL = howManyLessThans(LHS, RHS, L, IsSigned, ControlsExit, - AllowPredicates); + AllowPredicates, ExitCond); if (EL.hasAnyInfo()) return EL; break; } @@ -11589,7 +11589,8 @@ ScalarEvolution::ExitLimit ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool IsSigned, - bool ControlsExit, bool AllowPredicates) { + bool ControlsExit, bool AllowPredicates, + ICmpInst *OrigCond) { SmallPtrSet Predicates; const SCEVAddRecExpr *IV = dyn_cast(LHS); @@ -11822,8 +11823,29 @@ const SCEV *BECount = nullptr; auto *OrigStartMinusStride = getMinusSCEV(OrigStart, Stride); // Can we prove (max(RHS,Start) > Start - Stride? - if (isLoopEntryGuardedByCond(L, Cond, OrigStartMinusStride, OrigStart) && - isLoopEntryGuardedByCond(L, Cond, OrigStartMinusStride, OrigRHS)) { + if (isLoopEntryGuardedByCond(L, Cond, OrigStartMinusStride, OrigRHS)) { + auto MaySubOverflow = [&]() { + // Start - Stride < Start implies no overflow. + if (isLoopEntryGuardedByCond(L, Cond, OrigStartMinusStride, OrigStart)) + return false; + // Check if we have an IR instruction feeding into the branch + // "add nsw IVMinusStride, Stride" + if (getSCEV(OrigCond->getOperand(0)) == IV) { + if (auto *Add = dyn_cast(OrigCond->getOperand(0))) { + if (IsSigned ? Add->hasNoSignedWrap() : Add->hasNoUnsignedWrap()) { + if (getSCEV(Add->getOperand(1)) == Stride) + return false; + } + } + if (auto *GEP = dyn_cast(OrigCond->getOperand(0))) { + if (!IsSigned && GEP->isInBounds()) { + if (getSCEV(GEP->getOperand(0)) == getMinusSCEV(IV, Stride)) + return false; + } + } + } + return true; + }; // In this case, we can use a refined formula for computing backedge taken // count. The general formula remains: // "End-Start /uceiling Stride" where "End = max(RHS,Start)" @@ -11842,10 +11864,12 @@ // "((RHS - 1) - (Start - Stride)) /u Stride" reassociates to // "((RHS - (Start - Stride) - 1) /u Stride". // Our preconditions trivially imply no overflow in that form. - const SCEV *MinusOne = getMinusOne(Stride->getType()); - const SCEV *Numerator = - getMinusSCEV(getAddExpr(RHS, MinusOne), getMinusSCEV(Start, Stride)); - BECount = getUDivExpr(Numerator, Stride); + if (!MaySubOverflow()) { + const SCEV *MinusOne = getMinusOne(Stride->getType()); + const SCEV *Numerator = + getMinusSCEV(getAddExpr(RHS, MinusOne), getMinusSCEV(Start, Stride)); + BECount = getUDivExpr(Numerator, Stride); + } } const SCEV *BECountIfBackedgeTaken = nullptr; diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll --- a/llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll @@ -4,7 +4,7 @@ ; that this is not an infinite loop with side effects. ; CHECK-LABEL: Determining loop execution counts for: @foo1 -; CHECK: backedge-taken count is ((-1 + (%n smax %s)) /u %s) +; CHECK: backedge-taken count is ((-1 + %n) /u %s) ; We should have a conservative estimate for the max backedge taken count for ; loops with unknown stride. diff --git a/llvm/test/Transforms/LoopReroll/nonconst_lb.ll b/llvm/test/Transforms/LoopReroll/nonconst_lb.ll --- a/llvm/test/Transforms/LoopReroll/nonconst_lb.ll +++ b/llvm/test/Transforms/LoopReroll/nonconst_lb.ll @@ -17,24 +17,22 @@ ; CHECK-NEXT: [[CMP34:%.*]] = icmp slt i32 [[M:%.*]], [[N:%.*]] ; CHECK-NEXT: br i1 [[CMP34]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[M]], 4 -; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N]], i32 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SMAX]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[M]] -; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP2]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i32 [[TMP3]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i32 [[TMP4]], 3 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[M]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i32 [[TMP3]], 3 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[M]], [[INDVAR]] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP7]], 2 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP6]] +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[M]], [[INDVAR]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP6]], 2 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP5]] ; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVAR]], [[TMP5]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVAR]], [[TMP4]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]]