Index: llvm/lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1432,8 +1432,9 @@ } static bool optimizeLoopExitWithUnknownExitCount( - const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter, - bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter, + const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, + const SCEV *CurrMaxIter, const SCEV *MaxIter, bool SkipLastIter, + ScalarEvolution *SE, SCEVExpander &Rewriter, SmallVectorImpl &DeadInsts) { assert( (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) && @@ -1481,25 +1482,35 @@ // same exit count. For all other icmp's, we could use one less iteration, // because their value on the last iteration doesn't really matter. SmallPtrSet ICmpsFailingOnLastIter; - if (!SkipLastIter && LeafConditions.size() > 1 && - SE->getExitCount(L, ExitingBB, - ScalarEvolution::ExitCountKind::SymbolicMaximum) == - MaxIter) - for (auto *ICmp : LeafConditions) { - auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted, - /*ControlsExit*/ false); - auto *ExitMax = EL.SymbolicMaxNotTaken; - if (isa(ExitMax)) - continue; - // They could be of different types (specifically this happens after - // IV widening). - auto *WiderType = - SE->getWiderType(ExitMax->getType(), MaxIter->getType()); - auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType); - auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType); - if (WideExitMax == WideMaxIter) - ICmpsFailingOnLastIter.insert(ICmp); - } + if (!SkipLastIter && LeafConditions.size() > 1) { + auto *CurrBBExitCount = SE->getExitCount( + L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum); + auto *ExitCountUpToCurrBB = + isa(CurrMaxIter) + ? CurrBBExitCount + : SE->getUMinFromMismatchedTypes(CurrBBExitCount, CurrMaxIter); + if (ExitCountUpToCurrBB == MaxIter) + for (auto *ICmp : LeafConditions) { + auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted, + /*ControlsExit*/ false); + auto *ExitMax = EL.SymbolicMaxNotTaken; + if (isa(ExitMax)) + continue; + auto *MaxUpToCurrentICmp = + isa(CurrMaxIter) + ? ExitMax + : SE->getUMinFromMismatchedTypes(CurrMaxIter, ExitMax); + // They could be of different types (specifically this happens after + // IV widening). + auto *WiderType = + SE->getWiderType(ExitMax->getType(), MaxUpToCurrentICmp->getType()); + auto *WideMaxUpToCurrentICmp = + SE->getNoopOrZeroExtend(MaxUpToCurrentICmp, WiderType); + auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType); + if (WideMaxUpToCurrentICmp == WideMaxIter) + ICmpsFailingOnLastIter.insert(ICmp); + } + } bool Changed = false; for (auto *OldCond : LeafConditions) { @@ -1757,9 +1768,9 @@ // will remain the same within iteration space? auto *BI = cast(ExitingBB->getTerminator()); auto OptimizeCond = [&](bool SkipLastIter) { - return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB, - MaxBECount, SkipLastIter, - SE, Rewriter, DeadInsts); + return optimizeLoopExitWithUnknownExitCount( + L, BI, ExitingBB, CurrMaxExit, MaxBECount, SkipLastIter, SE, + Rewriter, DeadInsts); }; // TODO: We might have proved that we can skip the last iteration for Index: llvm/test/Transforms/IndVarSimplify/X86/widening-vs-and-elimination.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/X86/widening-vs-and-elimination.ll +++ llvm/test/Transforms/IndVarSimplify/X86/widening-vs-and-elimination.ll @@ -66,6 +66,9 @@ ; WIDENING_ON-LABEL: @test_02( ; WIDENING_ON-NEXT: bb: ; WIDENING_ON-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 +; WIDENING_ON-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP0]], -1 +; WIDENING_ON-NEXT: [[TMP2:%.*]] = zext i32 [[LIMIT:%.*]] to i64 +; WIDENING_ON-NEXT: [[RANGE_CHECK_WIDE_FIRST_ITER:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]] ; WIDENING_ON-NEXT: br label [[LOOP:%.*]] ; WIDENING_ON: loop: ; WIDENING_ON-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[TMP0]], [[BB:%.*]] ] @@ -73,12 +76,9 @@ ; WIDENING_ON-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[CANONICAL_IV]], 65635 ; WIDENING_ON-NEXT: br i1 [[EXITCOND]], label [[CHECKED:%.*]], label [[FAILED:%.*]] ; WIDENING_ON: checked: -; WIDENING_ON-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1 ; WIDENING_ON-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; WIDENING_ON-NEXT: [[NOT_ZERO:%.*]] = icmp ne i64 [[INDVARS_IV]], 0 -; WIDENING_ON-NEXT: [[TMP2:%.*]] = zext i32 [[LIMIT:%.*]] to i64 -; WIDENING_ON-NEXT: [[RANGE_CHECK_WIDE:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]] -; WIDENING_ON-NEXT: [[AND:%.*]] = and i1 [[NOT_ZERO]], [[RANGE_CHECK_WIDE]] +; WIDENING_ON-NEXT: [[AND:%.*]] = and i1 [[NOT_ZERO]], [[RANGE_CHECK_WIDE_FIRST_ITER]] ; WIDENING_ON-NEXT: br i1 [[AND]], label [[BACKEDGE]], label [[EXIT:%.*]] ; WIDENING_ON: backedge: ; WIDENING_ON-NEXT: [[CANONICAL_IV_NEXT]] = add nuw nsw i32 [[CANONICAL_IV]], 1 @@ -90,17 +90,18 @@ ; ; WIDENING_OFF-LABEL: @test_02( ; WIDENING_OFF-NEXT: bb: +; WIDENING_OFF-NEXT: [[TMP0:%.*]] = add i32 [[START:%.*]], -1 +; WIDENING_OFF-NEXT: [[RANGE_CHECK_FIRST_ITER:%.*]] = icmp ult i32 [[TMP0]], [[LIMIT:%.*]] ; WIDENING_OFF-NEXT: br label [[LOOP:%.*]] ; WIDENING_OFF: loop: ; WIDENING_OFF-NEXT: [[CANONICAL_IV:%.*]] = phi i32 [ [[CANONICAL_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 0, [[BB:%.*]] ] -; WIDENING_OFF-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE]] ], [ [[START:%.*]], [[BB]] ] +; WIDENING_OFF-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE]] ], [ [[START]], [[BB]] ] ; WIDENING_OFF-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[CANONICAL_IV]], 65635 ; WIDENING_OFF-NEXT: br i1 [[EXITCOND]], label [[CHECKED:%.*]], label [[FAILED:%.*]] ; WIDENING_OFF: checked: ; WIDENING_OFF-NEXT: [[IV_NEXT]] = add i32 [[IV]], -1 ; WIDENING_OFF-NEXT: [[NOT_ZERO:%.*]] = icmp ne i32 [[IV]], 0 -; WIDENING_OFF-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[IV_NEXT]], [[LIMIT:%.*]] -; WIDENING_OFF-NEXT: [[AND:%.*]] = and i1 [[NOT_ZERO]], [[RANGE_CHECK]] +; WIDENING_OFF-NEXT: [[AND:%.*]] = and i1 [[NOT_ZERO]], [[RANGE_CHECK_FIRST_ITER]] ; WIDENING_OFF-NEXT: br i1 [[AND]], label [[BACKEDGE]], label [[EXIT:%.*]] ; WIDENING_OFF: backedge: ; WIDENING_OFF-NEXT: [[ZEXT:%.*]] = zext i32 [[IV_NEXT]] to i64