Index: llvm/lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1435,8 +1435,9 @@ } static bool optimizeLoopExitWithUnknownExitCount( - const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter, - bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter, + const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, + const SCEV *CurrMaxIter, const SCEV *MaxIter, bool SkipLastIter, + ScalarEvolution *SE, SCEVExpander &Rewriter, SmallVectorImpl &DeadInsts) { assert( (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) && @@ -1484,11 +1485,17 @@ // conditions if there is another condition that gives the very same exit // count. bool TryMoreOptimisticLastIter = false; - if (!SkipLastIter && LeafConditions.size() <= MaxJoinedICmpsToAnalyze && - SE->getExitCount(L, ExitingBB, - ScalarEvolution::ExitCountKind::SymbolicMaximum) == - MaxIter) - TryMoreOptimisticLastIter = true; + if (!SkipLastIter && LeafConditions.size() <= MaxJoinedICmpsToAnalyze) { + const SCEV *CurrBlockMaxExit = SE->getExitCount( + L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum); + if (!isa(CurrBlockMaxExit)) { + const SCEV *MaxUpToCurrentBlock = + isa(CurrMaxIter) + ? CurrBlockMaxExit + : SE->getUMinFromMismatchedTypes(CurrMaxIter, CurrBlockMaxExit); + TryMoreOptimisticLastIter |= MaxUpToCurrentBlock == MaxIter; + } + } bool Changed = false; for (size_t i = 0; i < LeafConditions.size(); ++i) { @@ -1503,13 +1510,18 @@ // conditions may skip it. auto *ExitMax = EL.SymbolicMaxNotTaken; if (!isa(ExitMax)) { + const SCEV *MaxUpToCurrentICmp = + isa(CurrMaxIter) + ? ExitMax + : SE->getUMinFromMismatchedTypes(CurrMaxIter, ExitMax); // They could be of different types (specifically this happens after // IV widening). - auto *WiderType = - SE->getWiderType(ExitMax->getType(), MaxIter->getType()); - auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType); + auto *WiderType = SE->getWiderType(MaxUpToCurrentICmp->getType(), + MaxIter->getType()); + auto *WideMaxUpToCurrentICmp = + SE->getNoopOrZeroExtend(MaxUpToCurrentICmp, WiderType); auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType); - if (WideExitMax == WideMaxIter) { + if (WideMaxUpToCurrentICmp == WideMaxIter) { OptimisticSkipLastIter = true; break; } @@ -1758,9 +1770,9 @@ // will remain the same within iteration space? auto *BI = cast(ExitingBB->getTerminator()); auto OptimizeCond = [&](bool SkipLastIter) { - return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB, - MaxBECount, SkipLastIter, - SE, Rewriter, DeadInsts); + return optimizeLoopExitWithUnknownExitCount( + L, BI, ExitingBB, CurrMaxExit, MaxBECount, SkipLastIter, SE, + Rewriter, DeadInsts); }; // TODO: We might have proved that we can skip the last iteration for Index: llvm/test/Transforms/IndVarSimplify/X86/widening-vs-and-elimination.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/X86/widening-vs-and-elimination.ll +++ llvm/test/Transforms/IndVarSimplify/X86/widening-vs-and-elimination.ll @@ -66,6 +66,8 @@ ; WIDENING_ON-LABEL: @test_02( ; WIDENING_ON-NEXT: bb: ; WIDENING_ON-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 +; WIDENING_ON-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP0]], -1 +; WIDENING_ON-NEXT: [[TMP2:%.*]] = zext i32 [[LIMIT:%.*]] to i64 ; WIDENING_ON-NEXT: br label [[LOOP:%.*]] ; WIDENING_ON: loop: ; WIDENING_ON-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[TMP0]], [[BB:%.*]] ] @@ -73,12 +75,10 @@ ; WIDENING_ON-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[CANONICAL_IV]], 65635 ; WIDENING_ON-NEXT: br i1 [[EXITCOND]], label [[CHECKED:%.*]], label [[FAILED:%.*]] ; WIDENING_ON: checked: -; WIDENING_ON-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1 ; WIDENING_ON-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; WIDENING_ON-NEXT: [[NOT_ZERO:%.*]] = icmp ne i64 [[INDVARS_IV]], 0 -; WIDENING_ON-NEXT: [[TMP2:%.*]] = zext i32 [[LIMIT:%.*]] to i64 -; WIDENING_ON-NEXT: [[RANGE_CHECK_WIDE:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]] -; WIDENING_ON-NEXT: [[AND:%.*]] = and i1 [[NOT_ZERO]], [[RANGE_CHECK_WIDE]] +; WIDENING_ON-NEXT: [[RANGE_CHECK_WIDE_FIRST_ITER:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]] +; WIDENING_ON-NEXT: [[AND:%.*]] = and i1 [[NOT_ZERO]], [[RANGE_CHECK_WIDE_FIRST_ITER]] ; WIDENING_ON-NEXT: br i1 [[AND]], label [[BACKEDGE]], label [[EXIT:%.*]] ; WIDENING_ON: backedge: ; WIDENING_ON-NEXT: [[CANONICAL_IV_NEXT]] = add nuw nsw i32 [[CANONICAL_IV]], 1 @@ -90,17 +90,18 @@ ; ; WIDENING_OFF-LABEL: @test_02( ; WIDENING_OFF-NEXT: bb: +; WIDENING_OFF-NEXT: [[TMP0:%.*]] = add i32 [[START:%.*]], -1 ; WIDENING_OFF-NEXT: br label [[LOOP:%.*]] ; WIDENING_OFF: loop: ; WIDENING_OFF-NEXT: [[CANONICAL_IV:%.*]] = phi i32 [ [[CANONICAL_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 0, [[BB:%.*]] ] -; WIDENING_OFF-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE]] ], [ [[START:%.*]], [[BB]] ] +; WIDENING_OFF-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE]] ], [ [[START]], [[BB]] ] ; WIDENING_OFF-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[CANONICAL_IV]], 65635 ; WIDENING_OFF-NEXT: br i1 [[EXITCOND]], label [[CHECKED:%.*]], label [[FAILED:%.*]] ; WIDENING_OFF: checked: ; WIDENING_OFF-NEXT: [[IV_NEXT]] = add i32 [[IV]], -1 ; WIDENING_OFF-NEXT: [[NOT_ZERO:%.*]] = icmp ne i32 [[IV]], 0 -; WIDENING_OFF-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[IV_NEXT]], [[LIMIT:%.*]] -; WIDENING_OFF-NEXT: [[AND:%.*]] = and i1 [[NOT_ZERO]], [[RANGE_CHECK]] +; WIDENING_OFF-NEXT: [[RANGE_CHECK_FIRST_ITER:%.*]] = icmp ult i32 [[TMP0]], [[LIMIT:%.*]] +; WIDENING_OFF-NEXT: [[AND:%.*]] = and i1 [[NOT_ZERO]], [[RANGE_CHECK_FIRST_ITER]] ; WIDENING_OFF-NEXT: br i1 [[AND]], label [[BACKEDGE]], label [[EXIT:%.*]] ; WIDENING_OFF: backedge: ; WIDENING_OFF-NEXT: [[ZEXT:%.*]] = zext i32 [[IV_NEXT]] to i64