diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -2510,30 +2510,45 @@ } // Compute: - // Start + |Step| * Backedge < Start - // Start - |Step| * Backedge > Start + // 1. Start + |Step| * Backedge < Start + // 2. Start - |Step| * Backedge > Start + // + // And select either 1. or 2. depending on whether step is positive or + // negative. If Step is known to be positive or negative, only create + // either 1. or 2. Value *Add = nullptr, *Sub = nullptr; + bool NeedPosCheck = !SE.isKnownNegative(Step); + bool NeedNegCheck = !SE.isKnownPositive(Step); + if (PointerType *ARPtrTy = dyn_cast(ARTy)) { StartValue = InsertNoopCastOfTo( StartValue, Builder.getInt8PtrTy(ARPtrTy->getAddressSpace())); Value *NegMulV = Builder.CreateNeg(MulV); - Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV); - Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV); + if (NeedPosCheck) + Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV); + if (NeedNegCheck) + Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV); } else { - Add = Builder.CreateAdd(StartValue, MulV); - Sub = Builder.CreateSub(StartValue, MulV); + if (NeedPosCheck) + Add = Builder.CreateAdd(StartValue, MulV); + if (NeedNegCheck) + Sub = Builder.CreateSub(StartValue, MulV); + } + + Value *EndCompareLT = nullptr; + Value *EndCompareGT = nullptr; + Value *EndCheck = nullptr; + if (NeedPosCheck) + EndCheck = EndCompareLT = Builder.CreateICmp( + Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue); + if (NeedNegCheck) + EndCheck = EndCompareGT = Builder.CreateICmp( + Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue); + if (NeedPosCheck && NeedNegCheck) { + // Select the answer based on the sign of Step. + EndCheck = Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT); } - Value *EndCompareGT = Builder.CreateICmp( - Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue); - - Value *EndCompareLT = Builder.CreateICmp( - Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue); - - // Select the answer based on the sign of Step. - Value *EndCheck = - Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT); - // If the backedge taken count type is larger than the AR type, // check that we don't drop any bits by truncating it. If we are // dropping bits, then we have overflow (unless the step is zero). diff --git a/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll b/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll --- a/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll +++ b/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll @@ -18,23 +18,17 @@ ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] ; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]]) ; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 ; CHECK-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], [[A5]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]] -; CHECK-NEXT: [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]] +; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]] ; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]] ; CHECK-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] ; CHECK: for.body.ph.lver.orig: @@ -166,23 +160,17 @@ ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] ; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]]) ; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 ; CHECK-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[MUL_RESULT3]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[TMP11]] -; CHECK-NEXT: [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*) ; CHECK-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*) -; CHECK-NEXT: [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]] +; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]] ; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]] ; CHECK-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] ; CHECK: for.body.ph.lver.orig: diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll --- a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll @@ -541,13 +541,10 @@ ; VF-TWO-CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP2]]) ; VF-TWO-CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0 ; VF-TWO-CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1 -; VF-TWO-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], [[MUL_RESULT]] ; VF-TWO-CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]] ; VF-TWO-CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP0]] -; VF-TWO-CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP0]] -; VF-TWO-CHECK-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]] ; VF-TWO-CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP1]], 4294967295 -; VF-TWO-CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; VF-TWO-CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]] ; VF-TWO-CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]] ; VF-TWO-CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; VF-TWO-CHECK: vector.main.loop.iter.check: @@ -771,13 +768,10 @@ ; VF-FOUR-CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP2]]) ; VF-FOUR-CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0 ; VF-FOUR-CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1 -; VF-FOUR-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], [[MUL_RESULT]] ; VF-FOUR-CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]] ; VF-FOUR-CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP0]] -; VF-FOUR-CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP0]] -; VF-FOUR-CHECK-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]] ; VF-FOUR-CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP1]], 4294967295 -; VF-FOUR-CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; VF-FOUR-CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]] ; VF-FOUR-CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]] ; VF-FOUR-CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; VF-FOUR-CHECK: vector.main.loop.iter.check: diff --git a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll --- a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll @@ -63,11 +63,8 @@ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH]], label [[VECTOR_SCEVCHECK:%.*]] ; CHECK: vector.scevcheck: ; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP0]] -; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP8]], [[TMP0]] -; CHECK-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp slt i32 [[TMP10]], [[TMP8]] -; CHECK-NEXT: [[TMP14:%.*]] = select i1 false, i1 [[TMP12]], i1 [[TMP13]] -; CHECK-NEXT: br i1 [[TMP14]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[TMP13]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll --- a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll @@ -52,13 +52,10 @@ ; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP10]]) ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 -; CHECK-NEXT: [[TMP11:%.*]] = add i8 [[TMP7]], [[MUL_RESULT]] ; CHECK-NEXT: [[TMP12:%.*]] = sub i8 [[TMP7]], [[MUL_RESULT]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i8 [[TMP12]], [[TMP7]] -; CHECK-NEXT: [[TMP14:%.*]] = icmp ult i8 [[TMP11]], [[TMP7]] -; CHECK-NEXT: [[TMP15:%.*]] = select i1 true, i1 [[TMP13]], i1 [[TMP14]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp ugt i8 [[TMP12]], [[TMP7]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[TMP9]], 255 -; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP14]], [[TMP16]] ; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW]] ; CHECK-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll @@ -646,11 +646,8 @@ ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1) ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 0, [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 0, [[TMP0]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP1]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = select i1 false, i1 [[TMP3]], i1 [[TMP4]] -; CHECK-NEXT: br i1 [[TMP5]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[TMP4]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[UMAX1]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[UMAX1]], [[N_MOD_VF]] @@ -734,11 +731,8 @@ ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1) ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 0, [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 0, [[TMP0]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP1]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = select i1 false, i1 [[TMP3]], i1 [[TMP4]] -; CHECK-NEXT: br i1 [[TMP5]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[TMP4]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[UMAX1]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[UMAX1]], [[N_MOD_VF]] diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -3557,20 +3557,14 @@ ; CHECK: vector.scevcheck: ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8 ; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i8 [[T]], [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i8 [[TMP3]], [[T]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP2]], [[T]] -; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255 -; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[LEN]] to i8 ; CHECK-NEXT: [[TMP11:%.*]] = add i8 [[T]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i8 [[TMP12]], [[T]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp slt i8 [[TMP11]], [[T]] -; CHECK-NEXT: [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255 -; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP14]], [[TMP16]] ; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]] ; CHECK-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -3784,20 +3778,14 @@ ; UNROLL-NO-IC: vector.scevcheck: ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]] -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = sub i8 [[T]], [[TMP1]] -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp ugt i8 [[TMP3]], [[T]] ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP2]], [[T]] -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255 -; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] +; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]] ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = trunc i32 [[LEN]] to i8 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add i8 [[T]], [[TMP10]] -; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]] -; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = icmp sgt i8 [[TMP12]], [[T]] ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = icmp slt i8 [[TMP11]], [[T]] -; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]] ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255 -; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]] +; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = or i1 [[TMP14]], [[TMP16]] ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-IC: vector.ph: @@ -3984,20 +3972,14 @@ ; CHECK: vector.scevcheck: ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8 ; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i8 [[T]], [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i8 [[TMP3]], [[T]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP2]], [[T]] -; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255 -; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[LEN]] to i8 ; CHECK-NEXT: [[TMP11:%.*]] = add i8 [[T]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i8 [[TMP12]], [[T]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp slt i8 [[TMP11]], [[T]] -; CHECK-NEXT: [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255 -; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP14]], [[TMP16]] ; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]] ; CHECK-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -4221,20 +4203,14 @@ ; UNROLL-NO-IC: vector.scevcheck: ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]] -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = sub i8 [[T]], [[TMP1]] -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp ugt i8 [[TMP3]], [[T]] ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP2]], [[T]] -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255 -; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] +; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]] ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = trunc i32 [[LEN]] to i8 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add i8 [[T]], [[TMP10]] -; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]] -; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = icmp sgt i8 [[TMP12]], [[T]] ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = icmp slt i8 [[TMP11]], [[T]] -; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]] ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255 -; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]] +; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = or i1 [[TMP14]], [[TMP16]] ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-IC: vector.ph: @@ -4634,12 +4610,9 @@ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i32 0, [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP2]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]] ; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[K]], 2 @@ -4774,12 +4747,9 @@ ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 0, [[TMP1]] -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = sub i32 0, [[TMP1]] -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], 0 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP2]], 0 -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] +; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-IC: vector.ph: ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[K]], 4 @@ -6549,9 +6519,9 @@ ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = add i8 0, [[MUL_RESULT]] ; CHECK-NEXT: [[TMP7:%.*]] = sub i8 0, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP7]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i8 [[TMP6]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP9]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP6]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i8 [[TMP7]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP9]], i1 [[TMP8]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255 ; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0 ; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]] @@ -6627,9 +6597,9 @@ ; IND-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]]) ; IND-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 ; IND-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 -; IND-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128 -; IND-NEXT: [[TMP7:%.*]] = icmp slt i8 [[MUL_RESULT]], 0 -; IND-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP6]], i1 [[TMP7]] +; IND-NEXT: [[TMP6:%.*]] = icmp slt i8 [[MUL_RESULT]], 0 +; IND-NEXT: [[TMP7:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128 +; IND-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP7]], i1 [[TMP6]] ; IND-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255 ; IND-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0 ; IND-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] @@ -6700,9 +6670,9 @@ ; UNROLL-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]]) ; UNROLL-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 ; UNROLL-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 -; UNROLL-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128 -; UNROLL-NEXT: [[TMP7:%.*]] = icmp slt i8 [[MUL_RESULT]], 0 -; UNROLL-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP6]], i1 [[TMP7]] +; UNROLL-NEXT: [[TMP6:%.*]] = icmp slt i8 [[MUL_RESULT]], 0 +; UNROLL-NEXT: [[TMP7:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128 +; UNROLL-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP7]], i1 [[TMP6]] ; UNROLL-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255 ; UNROLL-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0 ; UNROLL-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] @@ -6780,9 +6750,9 @@ ; UNROLL-NO-IC-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i8 0, [[MUL_RESULT]] ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = sub i8 0, [[MUL_RESULT]] -; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP7]], 0 -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = icmp slt i8 [[TMP6]], 0 -; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP9]] +; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP6]], 0 +; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = icmp sgt i8 [[TMP7]], 0 +; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP9]], i1 [[TMP8]] ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0 ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]] @@ -6865,9 +6835,9 @@ ; INTERLEAVE-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]]) ; INTERLEAVE-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 ; INTERLEAVE-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 -; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128 -; INTERLEAVE-NEXT: [[TMP7:%.*]] = icmp slt i8 [[MUL_RESULT]], 0 -; INTERLEAVE-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP6]], i1 [[TMP7]] +; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp slt i8 [[MUL_RESULT]], 0 +; INTERLEAVE-NEXT: [[TMP7:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128 +; INTERLEAVE-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP7]], i1 [[TMP6]] ; INTERLEAVE-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255 ; INTERLEAVE-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0 ; INTERLEAVE-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -171,13 +171,10 @@ ; CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], [[MUL_RESULT]] ; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP0]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP0]] -; CHECK-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP1]], 4294967295 -; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]] ; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: diff --git a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll --- a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll +++ b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll @@ -53,9 +53,9 @@ ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = add i8 0, [[MUL_RESULT]] ; CHECK-NEXT: [[TMP7:%.*]] = sub i8 0, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP7]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i8 [[TMP6]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP9]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP6]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i8 [[TMP7]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP9]], i1 [[TMP8]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255 ; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0 ; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]] @@ -179,9 +179,9 @@ ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = add i8 0, [[MUL_RESULT]] ; CHECK-NEXT: [[TMP7:%.*]] = sub i8 0, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i8 [[TMP7]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = icmp ult i8 [[TMP6]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP9]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i8 [[TMP6]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP7]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP9]], i1 [[TMP8]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255 ; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0 ; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]] @@ -379,9 +379,9 @@ ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = add i8 0, [[MUL_RESULT]] ; CHECK-NEXT: [[TMP6:%.*]] = sub i8 0, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i8 [[TMP6]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP5]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP2]], i1 [[TMP7]], i1 [[TMP8]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP6]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP2]], i1 [[TMP8]], i1 [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[TMP0]], 255 ; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[CSTEP]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = and i1 [[TMP10]], [[TMP11]] diff --git a/llvm/test/Transforms/LoopVectorize/pr45259.ll b/llvm/test/Transforms/LoopVectorize/pr45259.ll --- a/llvm/test/Transforms/LoopVectorize/pr45259.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45259.ll @@ -25,12 +25,9 @@ ; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], [[ARR2]] ; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i8 ; CHECK-NEXT: [[TMP6:%.*]] = add i8 1, [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = sub i8 1, [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP7]], 1 ; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i8 [[TMP6]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = select i1 false, i1 [[TMP8]], i1 [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP4]], 255 -; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP9]], [[TMP11]] ; CHECK-NEXT: br i1 [[TMP12]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll --- a/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll @@ -21,12 +21,9 @@ ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2 ; CHECK-NEXT: [[TMP2:%.*]] = add i2 0, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i2 0, [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i2 [[TMP3]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3 -; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]] ; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[N]], -1 @@ -111,12 +108,9 @@ ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2 ; CHECK-NEXT: [[TMP2:%.*]] = add i2 0, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i2 0, [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i2 [[TMP3]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3 -; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]] ; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[N]], -1 @@ -280,12 +274,9 @@ ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2 ; CHECK-NEXT: [[TMP2:%.*]] = add i2 0, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i2 0, [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i2 [[TMP3]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3 -; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]] ; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 2 diff --git a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-non-integral-addrspace.ll b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-non-integral-addrspace.ll --- a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-non-integral-addrspace.ll +++ b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-non-integral-addrspace.ll @@ -18,10 +18,8 @@ ; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 0 ; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 1 ; LV: [[OFNegMulResult:%[^ ]*]] = sub i64 0, [[OFMulResult]] -; LV-NEXT: [[PosGEP:%[^ ]*]] = getelementptr i8, i8 addrspace(13)* [[Base:%[^ ]*]], i64 [[OFMulResult]] -; LV-NEXT: [[NegGEP:%[^ ]*]] = getelementptr i8, i8 addrspace(13)* [[Base]], i64 [[OFNegMulResult]] +; LV-NEXT: [[NegGEP:%[^ ]*]] = getelementptr i8, i8 addrspace(13)* [[Base:%[^ ]*]], i64 [[OFNegMulResult]] ; LV-NEXT: icmp ugt i8 addrspace(13)* [[NegGEP]], [[Base]] -; LV-NEXT: icmp ult i8 addrspace(13)* [[PosGEP]], [[Base]] ; LV-NOT: inttoptr ; LV-NOT: ptrtoint top: diff --git a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll --- a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll +++ b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll @@ -35,23 +35,17 @@ ; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 ; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 ; LV-NEXT: [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]] -; LV-NEXT: [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]] -; LV-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], 0 ; LV-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0 -; LV-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] ; LV-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] +; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]] ; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] ; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) ; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 ; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 ; LV-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]] ; LV-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]] -; LV-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP11]] -; LV-NEXT: [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], [[A5]] ; LV-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]] -; LV-NEXT: [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]] -; LV-NEXT: [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]] +; LV-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]] ; LV-NEXT: [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]] ; LV-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] ; LV: for.body.ph.lver.orig: @@ -160,13 +154,10 @@ ; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]]) ; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 ; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 -; LV-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[MUL_RESULT]] ; LV-NEXT: [[TMP4:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]] ; LV-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP4]], [[TMP1]] -; LV-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP1]] -; LV-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]] ; LV-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]] ; LV-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]] ; LV-NEXT: [[TMP12:%.*]] = trunc i64 [[N]] to i31 ; LV-NEXT: [[TMP13:%.*]] = zext i31 [[TMP12]] to i64 @@ -177,12 +168,9 @@ ; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 ; LV-NEXT: [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8* ; LV-NEXT: [[TMP15:%.*]] = sub i64 0, [[MUL_RESULT3]] -; LV-NEXT: [[TMP16:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]] ; LV-NEXT: [[TMP17:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP15]] ; LV-NEXT: [[TMP18:%.*]] = icmp ugt i8* [[TMP17]], [[SCEVGEP5]] -; LV-NEXT: [[TMP19:%.*]] = icmp ult i8* [[TMP16]], [[SCEVGEP5]] -; LV-NEXT: [[TMP20:%.*]] = select i1 true, i1 [[TMP18]], i1 [[TMP19]] -; LV-NEXT: [[TMP21:%.*]] = or i1 [[TMP20]], [[MUL_OVERFLOW4]] +; LV-NEXT: [[TMP21:%.*]] = or i1 [[TMP18]], [[MUL_OVERFLOW4]] ; LV-NEXT: [[TMP22:%.*]] = or i1 [[TMP10]], [[TMP21]] ; LV-NEXT: br i1 [[TMP22]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] ; LV: for.body.ph.lver.orig: @@ -276,23 +264,17 @@ ; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 ; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 ; LV-NEXT: [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]] -; LV-NEXT: [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]] -; LV-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], 0 ; LV-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP2]], 0 -; LV-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] ; LV-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] +; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]] ; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] ; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) ; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 ; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 ; LV-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]] ; LV-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]] -; LV-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP11]] -; LV-NEXT: [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], [[A5]] ; LV-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]] -; LV-NEXT: [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]] -; LV-NEXT: [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]] +; LV-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]] ; LV-NEXT: [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]] ; LV-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] ; LV: for.body.ph.lver.orig: @@ -377,13 +359,10 @@ ; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]]) ; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 ; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 -; LV-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[MUL_RESULT]] ; LV-NEXT: [[TMP4:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]] ; LV-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP1]] -; LV-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP1]] -; LV-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]] ; LV-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]] ; LV-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]] ; LV-NEXT: [[TMP12:%.*]] = sext i32 [[TMP1]] to i64 ; LV-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP12]] @@ -392,12 +371,9 @@ ; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 ; LV-NEXT: [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8* ; LV-NEXT: [[TMP13:%.*]] = sub i64 0, [[MUL_RESULT3]] -; LV-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]] ; LV-NEXT: [[TMP15:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP13]] ; LV-NEXT: [[TMP16:%.*]] = icmp ugt i8* [[TMP15]], [[SCEVGEP5]] -; LV-NEXT: [[TMP17:%.*]] = icmp ult i8* [[TMP14]], [[SCEVGEP5]] -; LV-NEXT: [[TMP18:%.*]] = select i1 true, i1 [[TMP16]], i1 [[TMP17]] -; LV-NEXT: [[TMP19:%.*]] = or i1 [[TMP18]], [[MUL_OVERFLOW4]] +; LV-NEXT: [[TMP19:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]] ; LV-NEXT: [[TMP20:%.*]] = or i1 [[TMP10]], [[TMP19]] ; LV-NEXT: br i1 [[TMP20]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] ; LV: for.body.ph.lver.orig: @@ -490,13 +466,10 @@ ; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]]) ; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 ; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 -; LV-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[MUL_RESULT]] ; LV-NEXT: [[TMP4:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]] ; LV-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP1]] -; LV-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP1]] -; LV-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]] ; LV-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]] ; LV-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]] ; LV-NEXT: [[TMP12:%.*]] = sext i32 [[TMP1]] to i64 ; LV-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP12]] @@ -505,12 +478,9 @@ ; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 ; LV-NEXT: [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8* ; LV-NEXT: [[TMP13:%.*]] = sub i64 0, [[MUL_RESULT3]] -; LV-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]] ; LV-NEXT: [[TMP15:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP13]] ; LV-NEXT: [[TMP16:%.*]] = icmp ugt i8* [[TMP15]], [[SCEVGEP5]] -; LV-NEXT: [[TMP17:%.*]] = icmp ult i8* [[TMP14]], [[SCEVGEP5]] -; LV-NEXT: [[TMP18:%.*]] = select i1 true, i1 [[TMP16]], i1 [[TMP17]] -; LV-NEXT: [[TMP19:%.*]] = or i1 [[TMP18]], [[MUL_OVERFLOW4]] +; LV-NEXT: [[TMP19:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]] ; LV-NEXT: [[TMP20:%.*]] = or i1 [[TMP10]], [[TMP19]] ; LV-NEXT: br i1 [[TMP20]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] ; LV: for.body.ph.lver.orig: