Index: llvm/include/llvm/Analysis/ScalarEvolution.h =================================================================== --- llvm/include/llvm/Analysis/ScalarEvolution.h +++ llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1737,6 +1737,7 @@ ExitLimit computeExitLimit(const Loop *L, BasicBlock *ExitingBlock, bool AllowPredicates = false); +public: /// Compute the number of times the backedge of the specified loop will /// execute if its exit condition were a conditional branch of ExitCond. /// @@ -1751,6 +1752,7 @@ bool ExitIfTrue, bool ControlsExit, bool AllowPredicates = false); +private: /// Return a symbolic upper bound for the backedge taken count of the loop. /// This is more general than getConstantMaxBackedgeTakenCount as it returns /// an arbitrary expression as opposed to only constants. Index: llvm/lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1468,11 +1468,47 @@ LeafConditions.push_back(Curr); } while (!Worklist.empty()); + // If the number of conditions is reasonably small, and this block is the last + // block without SkipLastIter, we can try to apply SkipLastIter to some of its + // conditions if there is another condition that gives the very same exit + // count. + bool TryMoreOptimisticLastIter = false; + if (!SkipLastIter && LeafConditions.size() < 8 && + SE->getExitCount(L, ExitingBB, + ScalarEvolution::ExitCountKind::SymbolicMaximum) == + MaxIter) + TryMoreOptimisticLastIter = true; + bool Changed = false; - for (auto *OldCond : LeafConditions) + for (size_t i = 0; i < LeafConditions.size(); ++i) { + auto *OldCond = LeafConditions[i]; + bool OptimisticSkipLastIter = SkipLastIter; + if (TryMoreOptimisticLastIter) { + for (size_t j = 0; j < LeafConditions.size(); j++) + if (i != j) { + auto EL = SE->computeExitLimitFromCond(L, LeafConditions[j], Inverted, + /*ControlsExit*/ false); + // If something is going to exit on the last iteration, all other leaf + // conditions may skip it. + auto *ExitMax = EL.SymbolicMaxNotTaken; + if (!isa(ExitMax)) { + // They could be of different types (specifically this happens after + // IV widening). + auto *WiderType = + SE->getWiderType(ExitMax->getType(), MaxIter->getType()); + auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType); + auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType); + if (WideExitMax == WideMaxIter) { + OptimisticSkipLastIter = true; + break; + } + } + } + } + if (auto Replaced = - createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted, - SkipLastIter, SE, Rewriter)) { + createReplacement(OldCond, L, ExitingBB, MaxIter, + Inverted, OptimisticSkipLastIter, SE, Rewriter)) { Changed = true; auto *NewCond = *Replaced; if (auto *NCI = dyn_cast(NewCond)) { @@ -1485,6 +1521,7 @@ OldCond->replaceAllUsesWith(NewCond); DeadInsts.push_back(OldCond); } + } return Changed; } Index: llvm/test/Transforms/IndVarSimplify/X86/widening-vs-and-elimination.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/X86/widening-vs-and-elimination.ll +++ llvm/test/Transforms/IndVarSimplify/X86/widening-vs-and-elimination.ll @@ -11,15 +11,15 @@ ; WIDENING_ON-NEXT: [[VAR:%.*]] = load atomic i32, ptr [[P1:%.*]] unordered, align 8 ; WIDENING_ON-NEXT: [[VAR1:%.*]] = load atomic i32, ptr [[P2:%.*]] unordered, align 8 ; WIDENING_ON-NEXT: [[TMP0:%.*]] = zext i32 [[VAR]] to i64 +; WIDENING_ON-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP0]], -1 +; WIDENING_ON-NEXT: [[TMP2:%.*]] = zext i32 [[VAR1]] to i64 ; WIDENING_ON-NEXT: br label [[BB2:%.*]] ; WIDENING_ON: bb2: ; WIDENING_ON-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BB8:%.*]] ], [ [[TMP0]], [[BB:%.*]] ] -; WIDENING_ON-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1 ; WIDENING_ON-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; WIDENING_ON-NEXT: [[VAR5:%.*]] = icmp ne i64 [[INDVARS_IV]], 0 -; WIDENING_ON-NEXT: [[TMP2:%.*]] = zext i32 [[VAR1]] to i64 -; WIDENING_ON-NEXT: [[VAR6_WIDE:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]] -; WIDENING_ON-NEXT: [[VAR7:%.*]] = and i1 [[VAR5]], [[VAR6_WIDE]] +; WIDENING_ON-NEXT: [[VAR6_WIDE_FIRST_ITER:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]] +; WIDENING_ON-NEXT: [[VAR7:%.*]] = and i1 [[VAR5]], [[VAR6_WIDE_FIRST_ITER]] ; WIDENING_ON-NEXT: br i1 [[VAR7]], label [[BB8]], label [[BB11:%.*]] ; WIDENING_ON: bb8: ; WIDENING_ON-NEXT: br label [[BB2]] @@ -30,13 +30,14 @@ ; WIDENING_OFF-NEXT: bb: ; WIDENING_OFF-NEXT: [[VAR:%.*]] = load atomic i32, ptr [[P1:%.*]] unordered, align 8 ; WIDENING_OFF-NEXT: [[VAR1:%.*]] = load atomic i32, ptr [[P2:%.*]] unordered, align 8 +; WIDENING_OFF-NEXT: [[TMP0:%.*]] = add i32 [[VAR]], -1 ; WIDENING_OFF-NEXT: br label [[BB2:%.*]] ; WIDENING_OFF: bb2: ; WIDENING_OFF-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR4:%.*]], [[BB8:%.*]] ], [ [[VAR]], [[BB:%.*]] ] ; WIDENING_OFF-NEXT: [[VAR4]] = add i32 [[VAR3]], -1 ; WIDENING_OFF-NEXT: [[VAR5:%.*]] = icmp ne i32 [[VAR3]], 0 -; WIDENING_OFF-NEXT: [[VAR6:%.*]] = icmp ult i32 [[VAR4]], [[VAR1]] -; WIDENING_OFF-NEXT: [[VAR7:%.*]] = and i1 [[VAR5]], [[VAR6]] +; WIDENING_OFF-NEXT: [[VAR6_FIRST_ITER:%.*]] = icmp ult i32 [[TMP0]], [[VAR1]] +; WIDENING_OFF-NEXT: [[VAR7:%.*]] = and i1 [[VAR5]], [[VAR6_FIRST_ITER]] ; WIDENING_OFF-NEXT: br i1 [[VAR7]], label [[BB8]], label [[BB11:%.*]] ; WIDENING_OFF: bb8: ; WIDENING_OFF-NEXT: [[VAR9:%.*]] = zext i32 [[VAR4]] to i64 Index: llvm/test/Transforms/IndVarSimplify/turn-to-invariant.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/turn-to-invariant.ll +++ llvm/test/Transforms/IndVarSimplify/turn-to-invariant.ll @@ -536,20 +536,20 @@ ret i32 -2 } -; TODO: This test is equivalent to @test_simple_case, with only difference -; that both checks are merged together into one 'and' check. This -; should not prevent turning the range check into invariant. -; https://alive2.llvm.org/ce/z/G-2ERB +; This test is equivalent to @test_simple_case, with only difference +; that both checks are merged together into one 'and' check. This +; should not prevent turning the range check into invariant. +; https://alive2.llvm.org/ce/z/G-2ERB define i32 @test_and_conditions(i32 %start, i32 %len) { ; CHECK-LABEL: @test_and_conditions( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[START:%.*]], -1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] ; CHECK-NEXT: [[ZERO_CHECK:%.*]] = icmp ne i32 [[IV]], 0 -; CHECK-NEXT: [[IV_MINUS_1:%.*]] = add i32 [[IV]], -1 -; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[IV_MINUS_1]], [[LEN:%.*]] -; CHECK-NEXT: [[BOTH_CHECKS:%.*]] = and i1 [[ZERO_CHECK]], [[RANGE_CHECK]] +; CHECK-NEXT: [[RANGE_CHECK_FIRST_ITER:%.*]] = icmp ult i32 [[TMP0]], [[LEN:%.*]] +; CHECK-NEXT: [[BOTH_CHECKS:%.*]] = and i1 [[ZERO_CHECK]], [[RANGE_CHECK_FIRST_ITER]] ; CHECK-NEXT: br i1 [[BOTH_CHECKS]], label [[BACKEDGE]], label [[FAILED:%.*]] ; CHECK: backedge: ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], -1 @@ -584,18 +584,18 @@ ret i32 -3 } -; TODO: Same as test_and_conditions, but swapped exit block branches, -; and condition is expressed as OR. Still optimizable. +; Same as test_and_conditions, but swapped exit block branches, +; and condition is expressed as OR. Still optimizable. define i32 @test_and_conditions_inverse(i32 %start, i32 %len) { ; CHECK-LABEL: @test_and_conditions_inverse( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[START:%.*]], -1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] ; CHECK-NEXT: [[ZERO_CHECK_FAILED:%.*]] = icmp eq i32 [[IV]], 0 -; CHECK-NEXT: [[IV_MINUS_1:%.*]] = add i32 [[IV]], -1 -; CHECK-NEXT: [[RANGE_CHECK_FAILED:%.*]] = icmp uge i32 [[IV_MINUS_1]], [[LEN:%.*]] -; CHECK-NEXT: [[EITHER_CHECK:%.*]] = or i1 [[ZERO_CHECK_FAILED]], [[RANGE_CHECK_FAILED]] +; CHECK-NEXT: [[RANGE_CHECK_FAILED_FIRST_ITER:%.*]] = icmp uge i32 [[TMP0]], [[LEN:%.*]] +; CHECK-NEXT: [[EITHER_CHECK:%.*]] = or i1 [[ZERO_CHECK_FAILED]], [[RANGE_CHECK_FAILED_FIRST_ITER]] ; CHECK-NEXT: br i1 [[EITHER_CHECK]], label [[FAILED:%.*]], label [[BACKEDGE]] ; CHECK: backedge: ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], -1 @@ -752,17 +752,17 @@ ret i32 -2 } -; TODO: Same as test_and_conditions, but with logical AND. +; Same as test_and_conditions, but with logical AND. define i32 @test_and_conditions_logical_and(i32 %start, i32 %len) { ; CHECK-LABEL: @test_and_conditions_logical_and( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[START:%.*]], -1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] ; CHECK-NEXT: [[ZERO_CHECK:%.*]] = icmp ne i32 [[IV]], 0 -; CHECK-NEXT: [[IV_MINUS_1:%.*]] = add i32 [[IV]], -1 -; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[IV_MINUS_1]], [[LEN:%.*]] -; CHECK-NEXT: [[BOTH_CHECKS:%.*]] = select i1 [[ZERO_CHECK]], i1 [[RANGE_CHECK]], i1 false +; CHECK-NEXT: [[RANGE_CHECK_FIRST_ITER:%.*]] = icmp ult i32 [[TMP0]], [[LEN:%.*]] +; CHECK-NEXT: [[BOTH_CHECKS:%.*]] = select i1 [[ZERO_CHECK]], i1 [[RANGE_CHECK_FIRST_ITER]], i1 false ; CHECK-NEXT: br i1 [[BOTH_CHECKS]], label [[BACKEDGE]], label [[FAILED:%.*]] ; CHECK: backedge: ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], -1 @@ -797,17 +797,17 @@ ret i32 -3 } -; TODO: Same as test_and_conditions_inverse, but with logical OR. +; Same as test_and_conditions_inverse, but with logical OR. define i32 @test_and_conditions_inverse_logical_or(i32 %start, i32 %len) { ; CHECK-LABEL: @test_and_conditions_inverse_logical_or( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[START:%.*]], -1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] ; CHECK-NEXT: [[ZERO_CHECK_FAILED:%.*]] = icmp eq i32 [[IV]], 0 -; CHECK-NEXT: [[IV_MINUS_1:%.*]] = add i32 [[IV]], -1 -; CHECK-NEXT: [[RANGE_CHECK_FAILED:%.*]] = icmp uge i32 [[IV_MINUS_1]], [[LEN:%.*]] -; CHECK-NEXT: [[EITHER_CHECK:%.*]] = select i1 [[ZERO_CHECK_FAILED]], i1 true, i1 [[RANGE_CHECK_FAILED]] +; CHECK-NEXT: [[RANGE_CHECK_FAILED_FIRST_ITER:%.*]] = icmp uge i32 [[TMP0]], [[LEN:%.*]] +; CHECK-NEXT: [[EITHER_CHECK:%.*]] = select i1 [[ZERO_CHECK_FAILED]], i1 true, i1 [[RANGE_CHECK_FAILED_FIRST_ITER]] ; CHECK-NEXT: br i1 [[EITHER_CHECK]], label [[FAILED:%.*]], label [[BACKEDGE]] ; CHECK: backedge: ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], -1