diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -11760,6 +11760,20 @@ } } + // We can handle the special case in advance, as shown: + // FoundLHS < FoundRHS => + // LHS = (FoundLHS + Offset) < RHS = (FoundRHS + Offset) + // Or + // FoundLHS > FoundRHS => + // LHS = (FoundLHS + Offset) > RHS = (FoundRHS + Offset) + if ((FoundPred == Pred) && (FoundLHS->getType() == LHS->getType()) && + (!FoundLHS->getType()->isPointerTy() && !LHS->getType()->isPointerTy())) { + auto *FoundLHSOffset = getMinusSCEV(LHS, FoundLHS); + auto *FoundRHSOffset = getMinusSCEV(RHS, FoundRHS); + if (FoundLHSOffset == FoundRHSOffset) + return true; + } + // Check whether the found predicate is the same as the desired predicate. if (FoundPred == Pred) return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, CtxI); diff --git a/llvm/test/Analysis/ScalarEvolution/add-like-or.ll b/llvm/test/Analysis/ScalarEvolution/add-like-or.ll --- a/llvm/test/Analysis/ScalarEvolution/add-like-or.ll +++ b/llvm/test/Analysis/ScalarEvolution/add-like-or.ll @@ -27,14 +27,14 @@ ; CHECK-NEXT: %i4 = or i64 1, %i3 ; CHECK-NEXT: --> (1 + (16 * (%arg /u 16))) U: [1,-14) S: [-9223372036854775807,9223372036854775794) ; CHECK-NEXT: %i7 = phi i64 [ %i4, %bb ], [ %i8, %bb6 ] -; CHECK-NEXT: --> {(1 + (16 * (%arg /u 16))),+,1}<%bb6> U: full-set S: full-set Exits: ((sext i32 %i to i64) smax (1 + (16 * (%arg /u 16)))) LoopDispositions: { %bb6: Computable } +; CHECK-NEXT: --> {(1 + (16 * (%arg /u 16))),+,1}<%bb6> U: full-set S: full-set Exits: (sext i32 %i to i64) LoopDispositions: { %bb6: Computable } ; CHECK-NEXT: %i8 = add i64 %i7, 1 -; CHECK-NEXT: --> {(2 + (16 * (%arg /u 16))),+,1}<%bb6> U: full-set S: full-set Exits: (1 + ((sext i32 %i to i64) smax (1 + (16 * (%arg /u 16))))) LoopDispositions: { %bb6: Computable } +; CHECK-NEXT: --> {(2 + (16 * (%arg /u 16))),+,1}<%bb6> U: full-set S: full-set Exits: (1 + (sext i32 %i to i64)) LoopDispositions: { %bb6: Computable } ; CHECK-NEXT: Determining loop execution counts for: @mask-high -; CHECK-NEXT: Loop %bb6: backedge-taken count is (-1 + (-16 * (%arg /u 16)) + ((sext i32 %i to i64) smax (1 + (16 * (%arg /u 16))))) +; CHECK-NEXT: Loop %bb6: backedge-taken count is (-1 + (sext i32 %i to i64) + (-16 * (%arg /u 16))) ; CHECK-NEXT: Loop %bb6: constant max backedge-taken count is -9223372034707292162 -; CHECK-NEXT: Loop %bb6: symbolic max backedge-taken count is (-1 + (-16 * (%arg /u 16)) + ((sext i32 %i to i64) smax (1 + (16 * (%arg /u 16))))) -; CHECK-NEXT: Loop %bb6: Predicated backedge-taken count is (-1 + (-16 * (%arg /u 16)) + ((sext i32 %i to i64) smax (1 + (16 * (%arg /u 16))))) +; CHECK-NEXT: Loop %bb6: symbolic max backedge-taken count is (-1 + (sext i32 %i to i64) + (-16 * (%arg /u 16))) +; CHECK-NEXT: Loop %bb6: Predicated backedge-taken count is (-1 + (sext i32 %i to i64) + (-16 * (%arg /u 16))) ; CHECK-NEXT: Predicates: ; CHECK: Loop %bb6: Trip multiple is 1 ; diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-implied-offset.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-implied-offset.ll --- a/llvm/test/Analysis/ScalarEvolution/trip-count-implied-offset.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count-implied-offset.ll @@ -7,10 +7,10 @@ define dso_local void @cal_sparseMV(i32 noundef %nCells, ptr nocapture noundef readonly %x, ptr nocapture noundef writeonly %b, ptr nocapture noundef readonly %values, i32 noundef %max_row_length, ptr nocapture noundef readonly %col_index) local_unnamed_addr { ; CHECK-LABEL: 'cal_sparseMV' ; CHECK: Determining loop execution counts for: @cal_sparseMV -; CHECK: Loop %for.body4: backedge-taken count is ({-1,+,(-1 * %max_row_length)}<%for.cond1.preheader> + ({1,+,%max_row_length}<%for.cond1.preheader> smax {%max_row_length,+,%max_row_length}<%for.cond1.preheader>)) +; CHECK: Loop %for.body4: backedge-taken count is (-1 + %max_row_length) ; CHECK: Loop %for.body4: constant max backedge-taken count is -1 -; CHECK: Loop %for.body4: symbolic max backedge-taken count is ({-1,+,(-1 * %max_row_length)}<%for.cond1.preheader> + ({1,+,%max_row_length}<%for.cond1.preheader> smax {%max_row_length,+,%max_row_length}<%for.cond1.preheader>)) -; CHECK: Loop %for.body4: Predicated backedge-taken count is ({-1,+,(-1 * %max_row_length)}<%for.cond1.preheader> + ({1,+,%max_row_length}<%for.cond1.preheader> smax {%max_row_length,+,%max_row_length}<%for.cond1.preheader>)) +; CHECK: Loop %for.body4: symbolic max backedge-taken count is (-1 + %max_row_length) +; CHECK: Loop %for.body4: Predicated backedge-taken count is (-1 + %max_row_length) ; entry: %cmp28 = icmp sgt i32 %nCells, 0 diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll b/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll --- a/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll @@ -102,12 +102,7 @@ ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: .LBB2_1: @ %for.body.preheader -; CHECK-NEXT: mov r3, r2 -; CHECK-NEXT: cmp r2, #8 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r3, #8 -; CHECK-NEXT: subs r3, r2, r3 -; CHECK-NEXT: add.w r12, r3, #7 +; CHECK-NEXT: sub.w r12, r2, #1 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w r3, r3, r12, lsr #3 ; CHECK-NEXT: dls lr, r3 diff --git a/llvm/test/Transforms/HardwareLoops/ARM/simple-do.ll b/llvm/test/Transforms/HardwareLoops/ARM/simple-do.ll --- a/llvm/test/Transforms/HardwareLoops/ARM/simple-do.ll +++ b/llvm/test/Transforms/HardwareLoops/ARM/simple-do.ll @@ -147,10 +147,8 @@ ; CHECK-LABEL: do_dec2 ; CHECK: entry: -; CHECK: [[ROUND:%[^ ]+]] = add i32 %n, 1 -; CHECK: [[SMIN:%[^ ]+]] = call i32 @llvm.smin.i32(i32 %n, i32 2) -; CHECK: [[SUB:%[^ ]+]] = sub i32 [[ROUND]], [[SMIN]] -; CHECK: [[HALVE:%[^ ]+]] = lshr i32 [[SUB]], 1 +; CHECK: [[ROUND:%[^ ]+]] = add i32 %n, -1 +; CHECK: [[HALVE:%[^ ]+]] = lshr i32 [[ROUND]], 1 ; CHECK: [[COUNT:%[^ ]+]] = add nuw i32 [[HALVE]], 1 ; CHECK: while.body.lr.ph: diff --git a/llvm/test/Transforms/IndVarSimplify/eliminate-comparison.ll b/llvm/test/Transforms/IndVarSimplify/eliminate-comparison.ll --- a/llvm/test/Transforms/IndVarSimplify/eliminate-comparison.ll +++ b/llvm/test/Transforms/IndVarSimplify/eliminate-comparison.ll @@ -573,8 +573,6 @@ ; CHECK-NEXT: [[ENTRY_COND:%.*]] = and i1 [[ENTRY_COND_0]], [[ENTRY_COND_1]] ; CHECK-NEXT: br i1 [[ENTRY_COND]], label [[LOOP_PREHEADER:%.*]], label [[LEAVE:%.*]] ; CHECK: loop.preheader: -; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LEN]], i32 0) -; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[SMAX]], -5 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_INC:%.*]], [[BE:%.*]] ], [ -6, [[LOOP_PREHEADER]] ] @@ -583,7 +581,7 @@ ; CHECK-NEXT: br i1 true, label [[BE]], label [[LEAVE_LOOPEXIT:%.*]] ; CHECK: be: ; CHECK-NEXT: call void @side_effect() -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_INC]], [[TMP0]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_INC]], [[LEN_ADD_5]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LEAVE_LOOPEXIT]] ; CHECK: leave.loopexit: ; CHECK-NEXT: br label [[LEAVE]] diff --git a/llvm/test/Transforms/LoopUnroll/X86/high-cost-expansion.ll b/llvm/test/Transforms/LoopUnroll/X86/high-cost-expansion.ll --- a/llvm/test/Transforms/LoopUnroll/X86/high-cost-expansion.ll +++ b/llvm/test/Transforms/LoopUnroll/X86/high-cost-expansion.ll @@ -11,12 +11,45 @@ ; CHECK-NEXT: [[I5:%.*]] = icmp sgt i64 [[I4]], [[I2]] ; CHECK-NEXT: br i1 [[I5]], label [[BB10:%.*]], label [[BB6_PREHEADER:%.*]] ; CHECK: bb6.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[I2]], [[I3]] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[I2]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[I3]] +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 7 +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[BB6_PROL_PREHEADER:%.*]], label [[BB6_PROL_LOOPEXIT:%.*]] +; CHECK: bb6.prol.preheader: +; CHECK-NEXT: br label [[BB6_PROL:%.*]] +; CHECK: bb6.prol: +; CHECK-NEXT: [[I7_PROL:%.*]] = phi i64 [ [[I8_PROL:%.*]], [[BB6_PROL]] ], [ [[I4]], [[BB6_PROL_PREHEADER]] ] +; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, [[BB6_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[BB6_PROL]] ] +; CHECK-NEXT: [[I8_PROL]] = add i64 [[I7_PROL]], 1 +; CHECK-NEXT: [[I9_PROL:%.*]] = icmp slt i64 [[I7_PROL]], [[I2]] +; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 +; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]] +; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[BB6_PROL]], label [[BB6_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: bb6.prol.loopexit.unr-lcssa: +; CHECK-NEXT: [[I7_UNR_PH:%.*]] = phi i64 [ [[I8_PROL]], [[BB6_PROL]] ] +; CHECK-NEXT: br label [[BB6_PROL_LOOPEXIT]] +; CHECK: bb6.prol.loopexit: +; CHECK-NEXT: [[I7_UNR:%.*]] = phi i64 [ [[I4]], [[BB6_PREHEADER]] ], [ [[I7_UNR_PH]], [[BB6_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 7 +; CHECK-NEXT: br i1 [[TMP3]], label [[BB10_LOOPEXIT:%.*]], label [[BB6_PREHEADER_NEW:%.*]] +; CHECK: bb6.preheader.new: ; CHECK-NEXT: br label [[BB6:%.*]] ; CHECK: bb6: -; CHECK-NEXT: [[I7:%.*]] = phi i64 [ [[I8:%.*]], [[BB6]] ], [ [[I4]], [[BB6_PREHEADER]] ] -; CHECK-NEXT: [[I8]] = add i64 [[I7]], 1 -; CHECK-NEXT: [[I9:%.*]] = icmp slt i64 [[I7]], [[I2]] -; CHECK-NEXT: br i1 [[I9]], label [[BB6]], label [[BB10_LOOPEXIT:%.*]] +; CHECK-NEXT: [[I7:%.*]] = phi i64 [ [[I7_UNR]], [[BB6_PREHEADER_NEW]] ], [ [[I8_7:%.*]], [[BB6]] ] +; CHECK-NEXT: [[I8:%.*]] = add i64 [[I7]], 1 +; CHECK-NEXT: [[I8_1:%.*]] = add i64 [[I8]], 1 +; CHECK-NEXT: [[I8_2:%.*]] = add i64 [[I8_1]], 1 +; CHECK-NEXT: [[I8_3:%.*]] = add i64 [[I8_2]], 1 +; CHECK-NEXT: [[I8_4:%.*]] = add i64 [[I8_3]], 1 +; CHECK-NEXT: [[I8_5:%.*]] = add i64 [[I8_4]], 1 +; CHECK-NEXT: [[I8_6:%.*]] = add i64 [[I8_5]], 1 +; CHECK-NEXT: [[I8_7]] = add i64 [[I8_6]], 1 +; CHECK-NEXT: [[I9_7:%.*]] = icmp slt i64 [[I8_6]], [[I2]] +; CHECK-NEXT: br i1 [[I9_7]], label [[BB6]], label [[BB10_LOOPEXIT_UNR_LCSSA:%.*]] +; CHECK: bb10.loopexit.unr-lcssa: +; CHECK-NEXT: br label [[BB10_LOOPEXIT]] ; CHECK: bb10.loopexit: ; CHECK-NEXT: br label [[BB10]] ; CHECK: bb10: