Index: llvm/lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -135,6 +135,11 @@ AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), cl::desc("Allow widening of indvars to eliminate s/zext")); +static cl::opt +MaxJoinedICmpsToAnalyze("indvars-max-joined-icmps", cl::Hidden, cl::init(8), + cl::desc("Max amount of ICmp's joined via AND/OR to " + "apply complex analyzes for.")); + namespace { class IndVarSimplify { @@ -1465,11 +1470,47 @@ LeafConditions.push_back(ICmp); } while (!Worklist.empty()); + // If the number of conditions is reasonably small, and this block is the last + // block without SkipLastIter, we can try to apply SkipLastIter to some of its + // conditions if there is another condition that gives the very same exit + // count. + bool TryMoreOptimisticLastIter = false; + if (!SkipLastIter && LeafConditions.size() <= MaxJoinedICmpsToAnalyze && + SE->getExitCount(L, ExitingBB, + ScalarEvolution::ExitCountKind::SymbolicMaximum) == + MaxIter) + TryMoreOptimisticLastIter = true; + bool Changed = false; - for (auto *OldCond : LeafConditions) + for (size_t i = 0; i < LeafConditions.size(); ++i) { + auto *OldCond = LeafConditions[i]; + bool OptimisticSkipLastIter = SkipLastIter; + if (TryMoreOptimisticLastIter) { + for (size_t j = 0; j < LeafConditions.size(); j++) + if (i != j) { + auto EL = SE->computeExitLimitFromCond(L, LeafConditions[j], Inverted, + /*ControlsExit*/ false); + // If something is going to exit on the last iteration, all other leaf + // conditions may skip it. + auto *ExitMax = EL.SymbolicMaxNotTaken; + if (!isa(ExitMax)) { + // They could be of different types (specifically this happens after + // IV widening). + auto *WiderType = + SE->getWiderType(ExitMax->getType(), MaxIter->getType()); + auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType); + auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType); + if (WideExitMax == WideMaxIter) { + OptimisticSkipLastIter = true; + break; + } + } + } + } + if (auto Replaced = createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted, - SkipLastIter, SE, Rewriter)) { + OptimisticSkipLastIter, SE, Rewriter)) { Changed = true; auto *NewCond = *Replaced; if (auto *NCI = dyn_cast(NewCond)) { @@ -1482,6 +1523,7 @@ OldCond->replaceAllUsesWith(NewCond); DeadInsts.push_back(OldCond); } + } return Changed; } Index: llvm/test/Transforms/IndVarSimplify/X86/widening-vs-and-elimination.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/X86/widening-vs-and-elimination.ll +++ llvm/test/Transforms/IndVarSimplify/X86/widening-vs-and-elimination.ll @@ -9,15 +9,15 @@ ; WIDENING_ON-LABEL: @test_01( ; WIDENING_ON-NEXT: bb: ; WIDENING_ON-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 +; WIDENING_ON-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP0]], -1 +; WIDENING_ON-NEXT: [[TMP2:%.*]] = zext i32 [[LIMIT:%.*]] to i64 ; WIDENING_ON-NEXT: br label [[LOOP:%.*]] ; WIDENING_ON: loop: ; WIDENING_ON-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[TMP0]], [[BB:%.*]] ] -; WIDENING_ON-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1 ; WIDENING_ON-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; WIDENING_ON-NEXT: [[NOT_ZERO:%.*]] = icmp ne i64 [[INDVARS_IV]], 0 -; WIDENING_ON-NEXT: [[TMP2:%.*]] = zext i32 [[LIMIT:%.*]] to i64 -; WIDENING_ON-NEXT: [[RANGE_CHECK_WIDE:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]] -; WIDENING_ON-NEXT: [[AND:%.*]] = and i1 [[NOT_ZERO]], [[RANGE_CHECK_WIDE]] +; WIDENING_ON-NEXT: [[RANGE_CHECK_WIDE_FIRST_ITER:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]] +; WIDENING_ON-NEXT: [[AND:%.*]] = and i1 [[NOT_ZERO]], [[RANGE_CHECK_WIDE_FIRST_ITER]] ; WIDENING_ON-NEXT: br i1 [[AND]], label [[BACKEDGE]], label [[EXIT:%.*]] ; WIDENING_ON: backedge: ; WIDENING_ON-NEXT: br label [[LOOP]] @@ -26,13 +26,14 @@ ; ; WIDENING_OFF-LABEL: @test_01( ; WIDENING_OFF-NEXT: bb: +; WIDENING_OFF-NEXT: [[TMP0:%.*]] = add i32 [[START:%.*]], -1 ; WIDENING_OFF-NEXT: br label [[LOOP:%.*]] ; WIDENING_OFF: loop: -; WIDENING_OFF-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[START:%.*]], [[BB:%.*]] ] +; WIDENING_OFF-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[START]], [[BB:%.*]] ] ; WIDENING_OFF-NEXT: [[IV_NEXT]] = add i32 [[IV]], -1 ; WIDENING_OFF-NEXT: [[NOT_ZERO:%.*]] = icmp ne i32 [[IV]], 0 -; WIDENING_OFF-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[IV_NEXT]], [[LIMIT:%.*]] -; WIDENING_OFF-NEXT: [[AND:%.*]] = and i1 [[NOT_ZERO]], [[RANGE_CHECK]] +; WIDENING_OFF-NEXT: [[RANGE_CHECK_FIRST_ITER:%.*]] = icmp ult i32 [[TMP0]], [[LIMIT:%.*]] +; WIDENING_OFF-NEXT: [[AND:%.*]] = and i1 [[NOT_ZERO]], [[RANGE_CHECK_FIRST_ITER]] ; WIDENING_OFF-NEXT: br i1 [[AND]], label [[BACKEDGE]], label [[EXIT:%.*]] ; WIDENING_OFF: backedge: ; WIDENING_OFF-NEXT: [[ZEXT:%.*]] = zext i32 [[IV_NEXT]] to i64 Index: llvm/test/Transforms/IndVarSimplify/turn-to-invariant.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/turn-to-invariant.ll +++ llvm/test/Transforms/IndVarSimplify/turn-to-invariant.ll @@ -536,20 +536,20 @@ ret i32 -2 } -; TODO: This test is equivalent to @test_simple_case, with only difference -; that both checks are merged together into one 'and' check. This -; should not prevent turning the range check into invariant. -; https://alive2.llvm.org/ce/z/G-2ERB +; This test is equivalent to @test_simple_case, with only difference +; that both checks are merged together into one 'and' check. This +; should not prevent turning the range check into invariant. +; https://alive2.llvm.org/ce/z/G-2ERB define i32 @test_and_conditions(i32 %start, i32 %len) { ; CHECK-LABEL: @test_and_conditions( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[START:%.*]], -1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] ; CHECK-NEXT: [[ZERO_CHECK:%.*]] = icmp ne i32 [[IV]], 0 -; CHECK-NEXT: [[IV_MINUS_1:%.*]] = add i32 [[IV]], -1 -; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[IV_MINUS_1]], [[LEN:%.*]] -; CHECK-NEXT: [[BOTH_CHECKS:%.*]] = and i1 [[ZERO_CHECK]], [[RANGE_CHECK]] +; CHECK-NEXT: [[RANGE_CHECK_FIRST_ITER:%.*]] = icmp ult i32 [[TMP0]], [[LEN:%.*]] +; CHECK-NEXT: [[BOTH_CHECKS:%.*]] = and i1 [[ZERO_CHECK]], [[RANGE_CHECK_FIRST_ITER]] ; CHECK-NEXT: br i1 [[BOTH_CHECKS]], label [[BACKEDGE]], label [[FAILED:%.*]] ; CHECK: backedge: ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], -1 @@ -584,18 +584,18 @@ ret i32 -3 } -; TODO: Same as test_and_conditions, but swapped exit block branches, -; and condition is expressed as OR. Still optimizable. +; Same as test_and_conditions, but swapped exit block branches, +; and condition is expressed as OR. Still optimizable. define i32 @test_and_conditions_inverse(i32 %start, i32 %len) { ; CHECK-LABEL: @test_and_conditions_inverse( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[START:%.*]], -1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] ; CHECK-NEXT: [[ZERO_CHECK_FAILED:%.*]] = icmp eq i32 [[IV]], 0 -; CHECK-NEXT: [[IV_MINUS_1:%.*]] = add i32 [[IV]], -1 -; CHECK-NEXT: [[RANGE_CHECK_FAILED:%.*]] = icmp uge i32 [[IV_MINUS_1]], [[LEN:%.*]] -; CHECK-NEXT: [[EITHER_CHECK:%.*]] = or i1 [[ZERO_CHECK_FAILED]], [[RANGE_CHECK_FAILED]] +; CHECK-NEXT: [[RANGE_CHECK_FAILED_FIRST_ITER:%.*]] = icmp uge i32 [[TMP0]], [[LEN:%.*]] +; CHECK-NEXT: [[EITHER_CHECK:%.*]] = or i1 [[ZERO_CHECK_FAILED]], [[RANGE_CHECK_FAILED_FIRST_ITER]] ; CHECK-NEXT: br i1 [[EITHER_CHECK]], label [[FAILED:%.*]], label [[BACKEDGE]] ; CHECK: backedge: ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], -1 @@ -752,17 +752,17 @@ ret i32 -2 } -; TODO: Same as test_and_conditions, but with logical AND. +; Same as test_and_conditions, but with logical AND. define i32 @test_and_conditions_logical_and(i32 %start, i32 %len) { ; CHECK-LABEL: @test_and_conditions_logical_and( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[START:%.*]], -1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] ; CHECK-NEXT: [[ZERO_CHECK:%.*]] = icmp ne i32 [[IV]], 0 -; CHECK-NEXT: [[IV_MINUS_1:%.*]] = add i32 [[IV]], -1 -; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[IV_MINUS_1]], [[LEN:%.*]] -; CHECK-NEXT: [[BOTH_CHECKS:%.*]] = select i1 [[ZERO_CHECK]], i1 [[RANGE_CHECK]], i1 false +; CHECK-NEXT: [[RANGE_CHECK_FIRST_ITER:%.*]] = icmp ult i32 [[TMP0]], [[LEN:%.*]] +; CHECK-NEXT: [[BOTH_CHECKS:%.*]] = select i1 [[ZERO_CHECK]], i1 [[RANGE_CHECK_FIRST_ITER]], i1 false ; CHECK-NEXT: br i1 [[BOTH_CHECKS]], label [[BACKEDGE]], label [[FAILED:%.*]] ; CHECK: backedge: ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], -1 @@ -797,17 +797,17 @@ ret i32 -3 } -; TODO: Same as test_and_conditions_inverse, but with logical OR. +; Same as test_and_conditions_inverse, but with logical OR. define i32 @test_and_conditions_inverse_logical_or(i32 %start, i32 %len) { ; CHECK-LABEL: @test_and_conditions_inverse_logical_or( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[START:%.*]], -1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] ; CHECK-NEXT: [[ZERO_CHECK_FAILED:%.*]] = icmp eq i32 [[IV]], 0 -; CHECK-NEXT: [[IV_MINUS_1:%.*]] = add i32 [[IV]], -1 -; CHECK-NEXT: [[RANGE_CHECK_FAILED:%.*]] = icmp uge i32 [[IV_MINUS_1]], [[LEN:%.*]] -; CHECK-NEXT: [[EITHER_CHECK:%.*]] = select i1 [[ZERO_CHECK_FAILED]], i1 true, i1 [[RANGE_CHECK_FAILED]] +; CHECK-NEXT: [[RANGE_CHECK_FAILED_FIRST_ITER:%.*]] = icmp uge i32 [[TMP0]], [[LEN:%.*]] +; CHECK-NEXT: [[EITHER_CHECK:%.*]] = select i1 [[ZERO_CHECK_FAILED]], i1 true, i1 [[RANGE_CHECK_FAILED_FIRST_ITER]] ; CHECK-NEXT: br i1 [[EITHER_CHECK]], label [[FAILED:%.*]], label [[BACKEDGE]] ; CHECK: backedge: ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], -1