diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2904,6 +2904,13 @@ if (Pred == CmpInst::ICMP_SLT && C == *C2) return new ICmpInst(ICmpInst::ICMP_UGT, X, ConstantInt::get(Ty, C ^ SMax)); + // (X + -1) X <=u C (if X is never null) + if (Pred == CmpInst::ICMP_ULT && C2->isAllOnes()) { + const SimplifyQuery Q = SQ.getWithInstruction(&Cmp); + if (llvm::isKnownNonZero(X, DL, 0, Q.AC, Q.CxtI, Q.DT)) + return new ICmpInst(ICmpInst::ICMP_ULE, X, ConstantInt::get(Ty, C)); + } + if (!Add->hasOneUse()) return nullptr; diff --git a/llvm/test/Transforms/InstCombine/icmp-add.ll b/llvm/test/Transforms/InstCombine/icmp-add.ll --- a/llvm/test/Transforms/InstCombine/icmp-add.ll +++ b/llvm/test/Transforms/InstCombine/icmp-add.ll @@ -1216,8 +1216,7 @@ ; CHECK-LABEL: @icmp_dec_assume_nonzero( ; CHECK-NEXT: [[Z:%.*]] = icmp ne i8 [[X:%.*]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[Z]]) -; CHECK-NEXT: [[I:%.*]] = add i8 [[X]], -1 -; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[I]], 7 +; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[X]], 8 ; CHECK-NEXT: ret i1 [[C]] ; %z = icmp ne i8 %x, 0 @@ -1231,8 +1230,7 @@ ; CHECK-LABEL: @icmp_dec_sub_assume_nonzero( ; CHECK-NEXT: [[Z:%.*]] = icmp ne i8 [[X:%.*]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[Z]]) -; CHECK-NEXT: [[I:%.*]] = add i8 [[X]], -1 -; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[I]], 11 +; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[X]], 12 ; CHECK-NEXT: ret i1 [[C]] ; %z = icmp ne i8 %x, 0 @@ -1244,9 +1242,7 @@ define i1 @icmp_dec_nonzero(i16 %x) { ; CHECK-LABEL: @icmp_dec_nonzero( -; CHECK-NEXT: [[O:%.*]] = or i16 [[X:%.*]], 4 -; CHECK-NEXT: [[I:%.*]] = add nsw i16 [[O]], -1 -; CHECK-NEXT: [[C:%.*]] = icmp ult i16 [[I]], 7 +; CHECK-NEXT: [[C:%.*]] = icmp ult i16 [[X:%.*]], 8 ; CHECK-NEXT: ret i1 [[C]] ; %o = or i16 %x, 4 diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll --- a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll +++ b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -S -passes='default' -mtriple aarch64 -mcpu=cortex-a55 | FileCheck %s -check-prefix=CHECK-A55 -; RUN: opt < %s -S -passes='default' -mtriple aarch64 | FileCheck %s -check-prefix=CHECK-GENERIC +; RUN: opt < %s -S -passes="default" -mtriple aarch64 -mcpu=cortex-a55 | FileCheck %s -check-prefix=CHECK-A55 +; RUN: opt < %s -S -passes="default" -mtriple aarch64 | FileCheck %s -check-prefix=CHECK-GENERIC ; Testing that, while runtime unrolling is performed on in-order cores (such as the cortex-a55), it is not performed when -mcpu is not specified define void @runtime_unroll_generic(i32 %arg_0, i32* %arg_1, i16* %arg_2, i16* %arg_3) { @@ -12,41 +12,40 @@ ; CHECK-A55-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, i16* [[ARG_2:%.*]], i64 undef ; CHECK-A55-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i16, i16* [[ARG_3:%.*]], i64 undef ; CHECK-A55-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[ARG_1:%.*]], i64 undef -; CHECK-A55-NEXT: [[TMP0:%.*]] = add i32 [[ARG_0]], -1 ; CHECK-A55-NEXT: [[XTRAITER:%.*]] = and i32 [[ARG_0]], 3 -; CHECK-A55-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3 -; CHECK-A55-NEXT: br i1 [[TMP1]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY6_LR_PH_NEW:%.*]] +; CHECK-A55-NEXT: [[TMP0:%.*]] = icmp ult i32 [[ARG_0]], 4 +; CHECK-A55-NEXT: br i1 [[TMP0]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY6_LR_PH_NEW:%.*]] ; CHECK-A55: for.body6.lr.ph.new: ; CHECK-A55-NEXT: [[UNROLL_ITER:%.*]] = and i32 [[ARG_0]], -4 ; CHECK-A55-NEXT: br label [[FOR_BODY6:%.*]] ; CHECK-A55: for.body6: ; CHECK-A55-NEXT: [[NITER:%.*]] = phi i32 [ 0, [[FOR_BODY6_LR_PH_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_BODY6]] ] -; CHECK-A55-NEXT: [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 -; CHECK-A55-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 -; CHECK-A55-NEXT: [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 -; CHECK-A55-NEXT: [[CONV15:%.*]] = sext i16 [[TMP3]] to i32 +; CHECK-A55-NEXT: [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 +; CHECK-A55-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +; CHECK-A55-NEXT: [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 +; CHECK-A55-NEXT: [[CONV15:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-A55-NEXT: [[MUL16:%.*]] = mul nsw i32 [[CONV15]], [[CONV]] -; CHECK-A55-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4 -; CHECK-A55-NEXT: [[ADD21:%.*]] = add nsw i32 [[MUL16]], [[TMP4]] +; CHECK-A55-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4 +; CHECK-A55-NEXT: [[ADD21:%.*]] = add nsw i32 [[MUL16]], [[TMP3]] ; CHECK-A55-NEXT: store i32 [[ADD21]], i32* [[ARRAYIDX20]], align 4 -; CHECK-A55-NEXT: [[TMP5:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 -; CHECK-A55-NEXT: [[CONV_1:%.*]] = sext i16 [[TMP5]] to i32 -; CHECK-A55-NEXT: [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 -; CHECK-A55-NEXT: [[CONV15_1:%.*]] = sext i16 [[TMP6]] to i32 +; CHECK-A55-NEXT: [[TMP4:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 +; CHECK-A55-NEXT: [[CONV_1:%.*]] = sext i16 [[TMP4]] to i32 +; CHECK-A55-NEXT: [[TMP5:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 +; CHECK-A55-NEXT: [[CONV15_1:%.*]] = sext i16 [[TMP5]] to i32 ; CHECK-A55-NEXT: [[MUL16_1:%.*]] = mul nsw i32 [[CONV15_1]], [[CONV_1]] ; CHECK-A55-NEXT: [[ADD21_1:%.*]] = add nsw i32 [[MUL16_1]], [[ADD21]] ; CHECK-A55-NEXT: store i32 [[ADD21_1]], i32* [[ARRAYIDX20]], align 4 -; CHECK-A55-NEXT: [[TMP7:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 -; CHECK-A55-NEXT: [[CONV_2:%.*]] = sext i16 [[TMP7]] to i32 -; CHECK-A55-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 -; CHECK-A55-NEXT: [[CONV15_2:%.*]] = sext i16 [[TMP8]] to i32 +; CHECK-A55-NEXT: [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 +; CHECK-A55-NEXT: [[CONV_2:%.*]] = sext i16 [[TMP6]] to i32 +; CHECK-A55-NEXT: [[TMP7:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 +; CHECK-A55-NEXT: [[CONV15_2:%.*]] = sext i16 [[TMP7]] to i32 ; CHECK-A55-NEXT: [[MUL16_2:%.*]] = mul nsw i32 [[CONV15_2]], [[CONV_2]] ; CHECK-A55-NEXT: [[ADD21_2:%.*]] = add nsw i32 [[MUL16_2]], [[ADD21_1]] ; CHECK-A55-NEXT: store i32 [[ADD21_2]], i32* [[ARRAYIDX20]], align 4 -; CHECK-A55-NEXT: [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 -; CHECK-A55-NEXT: [[CONV_3:%.*]] = sext i16 [[TMP9]] to i32 -; CHECK-A55-NEXT: [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 -; CHECK-A55-NEXT: [[CONV15_3:%.*]] = sext i16 [[TMP10]] to i32 +; CHECK-A55-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 +; CHECK-A55-NEXT: [[CONV_3:%.*]] = sext i16 [[TMP8]] to i32 +; CHECK-A55-NEXT: [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 +; CHECK-A55-NEXT: [[CONV15_3:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-A55-NEXT: [[MUL16_3:%.*]] = mul nsw i32 [[CONV15_3]], [[CONV_3]] ; CHECK-A55-NEXT: [[ADD21_3:%.*]] = add nsw i32 [[MUL16_3]], [[ADD21_2]] ; CHECK-A55-NEXT: store i32 [[ADD21_3]], i32* [[ARRAYIDX20]], align 4 @@ -57,35 +56,35 @@ ; CHECK-A55-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i32 [[XTRAITER]], 0 ; CHECK-A55-NEXT: br i1 [[LCMP_MOD_NOT]], label [[FOR_END]], label [[FOR_BODY6_EPIL:%.*]] ; CHECK-A55: for.body6.epil: -; CHECK-A55-NEXT: [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 -; CHECK-A55-NEXT: [[CONV_EPIL:%.*]] = sext i16 [[TMP11]] to i32 -; CHECK-A55-NEXT: [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 -; CHECK-A55-NEXT: [[CONV15_EPIL:%.*]] = sext i16 [[TMP12]] to i32 +; CHECK-A55-NEXT: [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 +; CHECK-A55-NEXT: [[CONV_EPIL:%.*]] = sext i16 [[TMP10]] to i32 +; CHECK-A55-NEXT: [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 +; CHECK-A55-NEXT: [[CONV15_EPIL:%.*]] = sext i16 [[TMP11]] to i32 ; CHECK-A55-NEXT: [[MUL16_EPIL:%.*]] = mul nsw i32 [[CONV15_EPIL]], [[CONV_EPIL]] -; CHECK-A55-NEXT: [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4 -; CHECK-A55-NEXT: [[ADD21_EPIL:%.*]] = add nsw i32 [[MUL16_EPIL]], [[TMP13]] +; CHECK-A55-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4 +; CHECK-A55-NEXT: [[ADD21_EPIL:%.*]] = add nsw i32 [[MUL16_EPIL]], [[TMP12]] ; CHECK-A55-NEXT: store i32 [[ADD21_EPIL]], i32* [[ARRAYIDX20]], align 4 ; CHECK-A55-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i32 [[XTRAITER]], 1 ; CHECK-A55-NEXT: br i1 [[EPIL_ITER_CMP_NOT]], label [[FOR_END]], label [[FOR_BODY6_EPIL_1:%.*]] ; CHECK-A55: for.body6.epil.1: -; CHECK-A55-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 -; CHECK-A55-NEXT: [[CONV_EPIL_1:%.*]] = sext i16 [[TMP14]] to i32 -; CHECK-A55-NEXT: [[TMP15:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 -; CHECK-A55-NEXT: [[CONV15_EPIL_1:%.*]] = sext i16 [[TMP15]] to i32 +; CHECK-A55-NEXT: [[TMP13:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 +; CHECK-A55-NEXT: [[CONV_EPIL_1:%.*]] = sext i16 [[TMP13]] to i32 +; CHECK-A55-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 +; CHECK-A55-NEXT: [[CONV15_EPIL_1:%.*]] = sext i16 [[TMP14]] to i32 ; CHECK-A55-NEXT: [[MUL16_EPIL_1:%.*]] = mul nsw i32 [[CONV15_EPIL_1]], [[CONV_EPIL_1]] -; CHECK-A55-NEXT: [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4 -; CHECK-A55-NEXT: [[ADD21_EPIL_1:%.*]] = add nsw i32 [[MUL16_EPIL_1]], [[TMP16]] +; CHECK-A55-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4 +; CHECK-A55-NEXT: [[ADD21_EPIL_1:%.*]] = add nsw i32 [[MUL16_EPIL_1]], [[TMP15]] ; CHECK-A55-NEXT: store i32 [[ADD21_EPIL_1]], i32* [[ARRAYIDX20]], align 4 ; CHECK-A55-NEXT: [[EPIL_ITER_CMP_1_NOT:%.*]] = icmp eq i32 [[XTRAITER]], 2 ; CHECK-A55-NEXT: br i1 [[EPIL_ITER_CMP_1_NOT]], label [[FOR_END]], label [[FOR_BODY6_EPIL_2:%.*]] ; CHECK-A55: for.body6.epil.2: -; CHECK-A55-NEXT: [[TMP17:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 -; CHECK-A55-NEXT: [[CONV_EPIL_2:%.*]] = sext i16 [[TMP17]] to i32 -; CHECK-A55-NEXT: [[TMP18:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 -; CHECK-A55-NEXT: [[CONV15_EPIL_2:%.*]] = sext i16 [[TMP18]] to i32 +; CHECK-A55-NEXT: [[TMP16:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 +; CHECK-A55-NEXT: [[CONV_EPIL_2:%.*]] = sext i16 [[TMP16]] to i32 +; CHECK-A55-NEXT: [[TMP17:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 +; CHECK-A55-NEXT: [[CONV15_EPIL_2:%.*]] = sext i16 [[TMP17]] to i32 ; CHECK-A55-NEXT: [[MUL16_EPIL_2:%.*]] = mul nsw i32 [[CONV15_EPIL_2]], [[CONV_EPIL_2]] -; CHECK-A55-NEXT: [[TMP19:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4 -; CHECK-A55-NEXT: [[ADD21_EPIL_2:%.*]] = add nsw i32 [[MUL16_EPIL_2]], [[TMP19]] +; CHECK-A55-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4 +; CHECK-A55-NEXT: [[ADD21_EPIL_2:%.*]] = add nsw i32 [[MUL16_EPIL_2]], [[TMP18]] ; CHECK-A55-NEXT: store i32 [[ADD21_EPIL_2]], i32* [[ARRAYIDX20]], align 4 ; CHECK-A55-NEXT: br label [[FOR_END]] ; CHECK-A55: for.end: diff --git a/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll b/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll --- a/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll @@ -9,10 +9,9 @@ ; CHECK-NEXT: br i1 [[CMP9]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]] ; CHECK: for.body.lr.ph: ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 -; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1 ; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 3 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 3 -; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_LR_PH_NEW:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[N]], 4 +; CHECK-NEXT: br i1 [[TMP0]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_LR_PH_NEW:%.*]] ; CHECK: for.body.lr.ph.new: ; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -28,30 +27,30 @@ ; CHECK-NEXT: br label [[FOR_BODY_EPIL:%.*]] ; CHECK: for.body.epil: ; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV_UNR]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4 ; CHECK-NEXT: [[ARRAYIDX2_EPIL:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV_UNR]] -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL]], align 4 -; CHECK-NEXT: [[MUL_EPIL:%.*]] = mul nsw i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL]], align 4 +; CHECK-NEXT: [[MUL_EPIL:%.*]] = mul nsw i32 [[TMP2]], [[TMP1]] ; CHECK-NEXT: [[ADD_EPIL:%.*]] = add nsw i32 [[MUL_EPIL]], [[C_010_UNR]] ; CHECK-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 1 ; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA:%.*]], label [[FOR_BODY_EPIL_1:%.*]] ; CHECK: for.body.epil.1: ; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL:%.*]] = add nuw nsw i64 [[INDVARS_IV_UNR]], 1 ; CHECK-NEXT: [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_EPIL]] -; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_1]], align 4 ; CHECK-NEXT: [[ARRAYIDX2_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT_EPIL]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_1]], align 4 -; CHECK-NEXT: [[MUL_EPIL_1:%.*]] = mul nsw i32 [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_1]], align 4 +; CHECK-NEXT: [[MUL_EPIL_1:%.*]] = mul nsw i32 [[TMP4]], [[TMP3]] ; CHECK-NEXT: [[ADD_EPIL_1:%.*]] = add nsw i32 [[MUL_EPIL_1]], [[ADD_EPIL]] ; CHECK-NEXT: [[EPIL_ITER_CMP_1_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 2 ; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_1_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]], label [[FOR_BODY_EPIL_2:%.*]] ; CHECK: for.body.epil.2: ; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_UNR]], 2 ; CHECK-NEXT: [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_EPIL_1]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_2]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_2]], align 4 ; CHECK-NEXT: [[ARRAYIDX2_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT_EPIL_1]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_2]], align 4 -; CHECK-NEXT: [[MUL_EPIL_2:%.*]] = mul nsw i32 [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_2]], align 4 +; CHECK-NEXT: [[MUL_EPIL_2:%.*]] = mul nsw i32 [[TMP6]], [[TMP5]] ; CHECK-NEXT: [[ADD_EPIL_2:%.*]] = add nsw i32 [[MUL_EPIL_2]], [[ADD_EPIL_1]] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]] ; CHECK: for.cond.cleanup.loopexit.epilog-lcssa: @@ -68,31 +67,31 @@ ; CHECK-NEXT: [[C_010:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_3]], [[FOR_BODY]] ] ; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[MUL]], [[C_010]] ; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = or i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]] -; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 ; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX2_1]], align 4 -; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[TMP11]], [[TMP10]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX2_1]], align 4 +; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[TMP10]], [[TMP9]] ; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[MUL_1]], [[ADD]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = or i64 [[INDVARS_IV]], 2 ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_1]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 ; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT_1]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX2_2]], align 4 -; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[TMP13]], [[TMP12]] +; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX2_2]], align 4 +; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[TMP12]], [[TMP11]] ; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[MUL_2]], [[ADD_1]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = or i64 [[INDVARS_IV]], 3 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_2]] -; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 ; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT_2]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX2_3]], align 4 -; CHECK-NEXT: [[MUL_3:%.*]] = mul nsw i32 [[TMP15]], [[TMP14]] +; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX2_3]], align 4 +; CHECK-NEXT: [[MUL_3:%.*]] = mul nsw i32 [[TMP14]], [[TMP13]] ; CHECK-NEXT: [[ADD_3]] = add nsw i32 [[MUL_3]], [[ADD_2]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4 ; CHECK-NEXT: [[NITER_NEXT_3]] = add i64 [[NITER]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll @@ -21,7 +21,7 @@ ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BLOCKSIZE]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 15 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[BLOCKSIZE]], 16 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -16 @@ -113,10 +113,10 @@ ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BLOCKSIZE]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[BLOCKSIZE]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: -; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 31 +; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[BLOCKSIZE]], 32 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -32 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll @@ -1359,7 +1359,7 @@ ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw i32 [[TMP1]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 6 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 7 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[TMP2]], -4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll --- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -198,10 +198,9 @@ ; AUTO_VEC-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; AUTO_VEC: for.body.preheader: ; AUTO_VEC-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64 -; AUTO_VEC-NEXT: [[TMP0:%.*]] = add nsw i64 [[ZEXT]], -1 ; AUTO_VEC-NEXT: [[XTRAITER:%.*]] = and i64 [[ZEXT]], 7 -; AUTO_VEC-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 -; AUTO_VEC-NEXT: br i1 [[TMP1]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]] +; AUTO_VEC-NEXT: [[TMP0:%.*]] = icmp ult i32 [[N]], 8 +; AUTO_VEC-NEXT: br i1 [[TMP0]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]] ; AUTO_VEC: for.body.preheader.new: ; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[ZEXT]], 4294967288 ; AUTO_VEC-NEXT: br label [[FOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll b/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll --- a/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll @@ -22,10 +22,10 @@ ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BLOCKSIZE]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[BLOCKSIZE]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: -; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 63 +; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[BLOCKSIZE]], 64 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -64 @@ -165,10 +165,10 @@ ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BLOCKSIZE]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 15 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[BLOCKSIZE]], 16 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: -; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 127 +; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[BLOCKSIZE]], 128 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -128 diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -27,7 +27,7 @@ ; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 ; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 +; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 ; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL1: vector.ph: ; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4 @@ -85,7 +85,7 @@ ; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 ; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 +; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8 ; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL2: vector.ph: ; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -8 @@ -286,7 +286,7 @@ ; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 ; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 +; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 ; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL1: vector.ph: ; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4 @@ -344,7 +344,7 @@ ; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 ; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 +; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8 ; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL2: vector.ph: ; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -8 @@ -548,7 +548,7 @@ ; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 ; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 +; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 ; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL1: vector.ph: ; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4 @@ -599,7 +599,7 @@ ; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 ; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 +; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8 ; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL2: vector.ph: ; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -8 @@ -792,7 +792,7 @@ ; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = add i32 [[N]], -1 ; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 ; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 -; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 3 +; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 ; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL1: vector.ph: ; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], -4 @@ -874,7 +874,7 @@ ; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = add i32 [[N]], -1 ; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 ; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 -; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 7 +; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8 ; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL2: vector.ph: ; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], -8 @@ -1169,7 +1169,7 @@ ; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 ; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 +; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 ; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL1: vector.ph: ; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4 @@ -1217,7 +1217,7 @@ ; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 ; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 +; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8 ; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL2: vector.ph: ; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -8 diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll --- a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll +++ b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll @@ -12,7 +12,7 @@ ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[N]], -1 diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -4576,8 +4576,7 @@ ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 2 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; IND: vector.scevcheck: -; IND-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1 -; IND-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[TMP0]], 2147483648 +; IND-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[K]], 2147483649 ; IND-NEXT: br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]] ; IND: vector.ph: ; IND-NEXT: [[N_VEC:%.*]] = and i64 [[K]], -2 @@ -4586,14 +4585,14 @@ ; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; IND-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; IND-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32 -; IND-NEXT: [[TMP1:%.*]] = ashr exact i64 [[SEXT]], 32 -; IND-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP1]] -; IND-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>* -; IND-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP3]], align 4 +; IND-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 32 +; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]] +; IND-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <2 x i32>* +; IND-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP2]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], -; IND-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IND-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] +; IND-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IND-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] ; IND: middle.block: ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[K]] ; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -4604,8 +4603,8 @@ ; IND-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; IND-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32 ; IND-NEXT: [[SEXT1:%.*]] = shl i64 [[INDVARS_IV]], 32 -; IND-NEXT: [[TMP5:%.*]] = ashr exact i64 [[SEXT1]], 32 -; IND-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]] +; IND-NEXT: [[TMP4:%.*]] = ashr exact i64 [[SEXT1]], 32 +; IND-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]] ; IND-NEXT: store i32 [[TRUNC_IV]], i32* [[ARRAYIDX]], align 4 ; IND-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; IND-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]] @@ -4618,8 +4617,7 @@ ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 4 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; UNROLL: vector.scevcheck: -; UNROLL-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1 -; UNROLL-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[TMP0]], 2147483648 +; UNROLL-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[K]], 2147483649 ; UNROLL-NEXT: br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]] ; UNROLL: vector.ph: ; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[K]], -4 @@ -4629,17 +4627,17 @@ ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], ; UNROLL-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32 -; UNROLL-NEXT: [[TMP1:%.*]] = ashr exact i64 [[SEXT]], 32 -; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP1]] -; UNROLL-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>* -; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP3]], align 4 -; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 2 -; UNROLL-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <2 x i32>* -; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP5]], align 4 +; UNROLL-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 32 +; UNROLL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]] +; UNROLL-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <2 x i32>* +; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP2]], align 4 +; UNROLL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 2 +; UNROLL-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>* +; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP4]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], -; UNROLL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] +; UNROLL-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[K]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -4650,8 +4648,8 @@ ; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; UNROLL-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32 ; UNROLL-NEXT: [[SEXT2:%.*]] = shl i64 [[INDVARS_IV]], 32 -; UNROLL-NEXT: [[TMP7:%.*]] = ashr exact i64 [[SEXT2]], 32 -; UNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]] +; UNROLL-NEXT: [[TMP6:%.*]] = ashr exact i64 [[SEXT2]], 32 +; UNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]] ; UNROLL-NEXT: store i32 [[TRUNC_IV]], i32* [[ARRAYIDX]], align 4 ; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]] @@ -4715,8 +4713,7 @@ ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 8 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; INTERLEAVE: vector.scevcheck: -; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1 -; INTERLEAVE-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[TMP0]], 2147483648 +; INTERLEAVE-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[K]], 2147483649 ; INTERLEAVE-NEXT: br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]] ; INTERLEAVE: vector.ph: ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[K]], -8 @@ -4726,17 +4723,17 @@ ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], ; INTERLEAVE-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32 -; INTERLEAVE-NEXT: [[TMP1:%.*]] = ashr exact i64 [[SEXT]], 32 -; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP1]] -; INTERLEAVE-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* -; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], <4 x i32>* [[TMP3]], align 4 -; INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 4 -; INTERLEAVE-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>* -; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], <4 x i32>* [[TMP5]], align 4 +; INTERLEAVE-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 32 +; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]] +; INTERLEAVE-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>* +; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], <4 x i32>* [[TMP2]], align 4 +; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 4 +; INTERLEAVE-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* +; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], <4 x i32>* [[TMP4]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], -; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; INTERLEAVE-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] +; INTERLEAVE-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; INTERLEAVE-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] ; INTERLEAVE: middle.block: ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[K]] ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -4747,8 +4744,8 @@ ; INTERLEAVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; INTERLEAVE-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32 ; INTERLEAVE-NEXT: [[SEXT2:%.*]] = shl i64 [[INDVARS_IV]], 32 -; INTERLEAVE-NEXT: [[TMP7:%.*]] = ashr exact i64 [[SEXT2]], 32 -; INTERLEAVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]] +; INTERLEAVE-NEXT: [[TMP6:%.*]] = ashr exact i64 [[SEXT2]], 32 +; INTERLEAVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]] ; INTERLEAVE-NEXT: store i32 [[TRUNC_IV]], i32* [[ARRAYIDX]], align 4 ; INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check.ll b/llvm/test/Transforms/LoopVectorize/runtime-check.ll --- a/llvm/test/Transforms/LoopVectorize/runtime-check.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check.ll @@ -21,7 +21,7 @@ ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1, !dbg [[DBG9:![0-9]+]] ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64, !dbg [[DBG9]] ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1, !dbg [[DBG9]] -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3, !dbg [[DBG9]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4, !dbg [[DBG9]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]], !dbg [[DBG9]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[A1]], [[B2]], !dbg [[DBG9]]