Index: llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1064,6 +1064,22 @@ break; } + case Instruction::Add: { + unsigned NLZ = DemandedMask.countLeadingZeros(); + APInt DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ); + + // If an operand adds zeros to every bit below the highest demanded bit, + // that operand doesn't change the result. Return the other side. + computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); + if (DemandedFromOps.isSubsetOf(RHSKnown.Zero)) + return I->getOperand(0); + + computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); + if (DemandedFromOps.isSubsetOf(LHSKnown.Zero)) + return I->getOperand(1); + + break; + } case Instruction::AShr: { // Compute the Known bits to simplify things downstream. computeKnownBits(I, Known, Depth, CxtI); Index: llvm/test/Transforms/InstCombine/add.ll =================================================================== --- llvm/test/Transforms/InstCombine/add.ll +++ llvm/test/Transforms/InstCombine/add.ll @@ -2183,7 +2183,7 @@ ; CHECK-NEXT: [[M:%.*]] = mul i8 [[X:%.*]], -32 ; CHECK-NEXT: [[A:%.*]] = add i8 [[M]], [[Y:%.*]] ; CHECK-NEXT: call void @use(i8 [[A]]) -; CHECK-NEXT: [[R:%.*]] = trunc i8 [[A]] to i5 +; CHECK-NEXT: [[R:%.*]] = trunc i8 [[Y]] to i5 ; CHECK-NEXT: ret i5 [[R]] ; %m = mul i8 %x, -32 ; 0xE0 @@ -2214,7 +2214,7 @@ ; CHECK-NEXT: [[M:%.*]] = and i8 [[X:%.*]], -64 ; CHECK-NEXT: [[A:%.*]] = add i8 [[Y]], [[M]] ; CHECK-NEXT: call void @use(i8 [[A]]) -; CHECK-NEXT: [[S:%.*]] = sub i8 [[A]], [[Z:%.*]] +; CHECK-NEXT: [[S:%.*]] = sub i8 [[Y]], [[Z:%.*]] ; CHECK-NEXT: [[R:%.*]] = shl i8 [[S]], 2 ; CHECK-NEXT: ret i8 [[R]] ; @@ -2257,7 +2257,7 @@ ; CHECK-NEXT: [[XY:%.*]] = or i128 [[SHY]], [[ZX]] ; CHECK-NEXT: [[SUB:%.*]] = sub i128 [[XY]], [[ZZ]] ; CHECK-NEXT: [[ADD:%.*]] = add i128 [[SUB]], [[MW]] -; CHECK-NEXT: [[T:%.*]] = trunc i128 [[ADD]] to i64 +; CHECK-NEXT: [[T:%.*]] = trunc i128 [[SUB]] to i64 ; CHECK-NEXT: [[H:%.*]] = lshr i128 [[ADD]], 64 ; CHECK-NEXT: [[T2:%.*]] = trunc i128 [[H]] to i64 ; CHECK-NEXT: [[R1:%.*]] = insertvalue { i64, i64 } poison, i64 [[T]], 0 Index: llvm/test/Transforms/InstCombine/shift.ll =================================================================== --- llvm/test/Transforms/InstCombine/shift.ll +++ llvm/test/Transforms/InstCombine/shift.ll @@ -1750,11 +1750,12 @@ define void @ossfuzz_38078(i32 %arg, i32 %arg1, i32* %ptr, i1* %ptr2, i32* %ptr3, i1* %ptr4, i32* %ptr5, i32* %ptr6, i1* %ptr7) { ; CHECK-LABEL: @ossfuzz_38078( ; CHECK-NEXT: bb: +; CHECK-NEXT: [[I2:%.*]] = add nsw i32 [[ARG:%.*]], [[ARG1:%.*]] +; CHECK-NEXT: [[B3:%.*]] = or i32 [[I2]], 2147483647 ; CHECK-NEXT: [[G1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 -1 -; CHECK-NEXT: [[I2:%.*]] = sub i32 0, [[ARG1:%.*]] -; CHECK-NEXT: [[I5:%.*]] = icmp eq i32 [[I2]], [[ARG:%.*]] +; CHECK-NEXT: [[I5:%.*]] = icmp eq i32 [[I2]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[I5]]) -; CHECK-NEXT: store volatile i32 2147483647, i32* [[G1]], align 4 +; CHECK-NEXT: store volatile i32 [[B3]], i32* [[G1]], align 4 ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: BB: ; CHECK-NEXT: unreachable Index: llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll @@ -24,7 +24,7 @@ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 15 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934576 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -16 ; CHECK-NEXT: [[CAST_VTC:%.*]] = trunc i64 [[N_VEC]] to i32 ; CHECK-NEXT: [[IND_END:%.*]] = sub i32 [[BLOCKSIZE]], [[CAST_VTC]] ; CHECK-NEXT: [[IND_END1:%.*]] = getelementptr i16, i16* [[PSRC:%.*]], i64 [[N_VEC]] @@ -119,7 +119,7 @@ ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 31 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934560 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -32 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[OFFSET:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <16 x i8> poison, i8 [[OFFSET]], i64 0 @@ -157,7 +157,7 @@ ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_VEC9:%.*]] = and i64 [[TMP2]], 8589934584 +; CHECK-NEXT: [[N_VEC9:%.*]] = and i64 [[TMP2]], -8 ; CHECK-NEXT: [[CAST_VTC:%.*]] = trunc i64 [[N_VEC9]] to i32 ; CHECK-NEXT: [[IND_END:%.*]] = sub i32 [[BLOCKSIZE]], [[CAST_VTC]] ; CHECK-NEXT: [[IND_END12:%.*]] = getelementptr i8, i8* [[PSRC]], i64 [[N_VEC9]] Index: llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -25,8 +25,8 @@ ; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; AUTO_VEC: vector.ph: ; AUTO_VEC-NEXT: [[N_VEC:%.*]] = and i64 [[ZEXT]], 4294967264 -; AUTO_VEC-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float -; AUTO_VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 +; AUTO_VEC-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to float +; AUTO_VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[CAST_VTC]], 5.000000e-01 ; AUTO_VEC-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 ; AUTO_VEC-NEXT: [[TMP1:%.*]] = add nsw i64 [[ZEXT]], -32 ; AUTO_VEC-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 5 @@ -35,7 +35,7 @@ ; AUTO_VEC-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP1]], 96 ; AUTO_VEC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]] ; AUTO_VEC: vector.ph.new: -; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP3]], 1152921504606846972 +; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP3]], -4 ; AUTO_VEC-NEXT: br label [[VECTOR_BODY:%.*]] ; AUTO_VEC: vector.body: ; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_3:%.*]], [[VECTOR_BODY]] ] @@ -296,8 +296,8 @@ ; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; AUTO_VEC: vector.ph: ; AUTO_VEC-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775792 -; AUTO_VEC-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to double -; AUTO_VEC-NEXT: [[TMP0:%.*]] = fmul fast double [[CAST_CRD]], 3.000000e+00 +; AUTO_VEC-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to double +; AUTO_VEC-NEXT: [[TMP0:%.*]] = fmul fast double [[CAST_VTC]], 3.000000e+00 ; AUTO_VEC-NEXT: [[TMP1:%.*]] = add nsw i64 [[SMAX]], -16 ; AUTO_VEC-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 4 ; AUTO_VEC-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 @@ -305,7 +305,7 @@ ; AUTO_VEC-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP1]], 48 ; AUTO_VEC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]] ; AUTO_VEC: vector.ph.new: -; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP3]], 2305843009213693948 +; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP3]], -4 ; AUTO_VEC-NEXT: br label [[VECTOR_BODY:%.*]] ; AUTO_VEC: vector.body: ; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_3:%.*]], [[VECTOR_BODY]] ] @@ -556,8 +556,8 @@ ; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; AUTO_VEC: vector.ph: ; AUTO_VEC-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 4294967264 -; AUTO_VEC-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float -; AUTO_VEC-NEXT: [[TMP1:%.*]] = fmul reassoc float [[CAST_CRD]], 4.200000e+01 +; AUTO_VEC-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to float +; AUTO_VEC-NEXT: [[TMP1:%.*]] = fmul reassoc float [[CAST_VTC]], 4.200000e+01 ; AUTO_VEC-NEXT: [[IND_END:%.*]] = fadd reassoc float [[TMP1]], 1.000000e+00 ; AUTO_VEC-NEXT: [[TMP2:%.*]] = add nsw i64 [[TMP0]], -32 ; AUTO_VEC-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 5 @@ -566,7 +566,7 @@ ; AUTO_VEC-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP2]], 32 ; AUTO_VEC-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]] ; AUTO_VEC: vector.ph.new: -; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP4]], 1152921504606846974 +; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP4]], -2 ; AUTO_VEC-NEXT: br label [[VECTOR_BODY:%.*]] ; AUTO_VEC: vector.body: ; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ] Index: llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll +++ llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll @@ -28,7 +28,7 @@ ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 63 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934528 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -64 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[OFFSET:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <16 x i16> poison, i16 [[OFFSET]], i64 0 @@ -84,7 +84,7 @@ ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_VEC19:%.*]] = and i64 [[TMP2]], 8589934584 +; CHECK-NEXT: [[N_VEC19:%.*]] = and i64 [[TMP2]], -8 ; CHECK-NEXT: [[CAST_VTC:%.*]] = trunc i64 [[N_VEC19]] to i32 ; CHECK-NEXT: [[IND_END:%.*]] = sub i32 [[BLOCKSIZE]], [[CAST_VTC]] ; CHECK-NEXT: [[IND_END22:%.*]] = getelementptr i16, i16* [[PSRC]], i64 [[N_VEC19]] @@ -171,7 +171,7 @@ ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 127 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934464 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -128 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[OFFSET:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i8> [[BROADCAST_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <32 x i8> poison, i8 [[OFFSET]], i64 0 @@ -227,7 +227,7 @@ ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_VEC19:%.*]] = and i64 [[TMP2]], 8589934576 +; CHECK-NEXT: [[N_VEC19:%.*]] = and i64 [[TMP2]], -16 ; CHECK-NEXT: [[CAST_VTC:%.*]] = trunc i64 [[N_VEC19]] to i32 ; CHECK-NEXT: [[IND_END:%.*]] = sub i32 [[BLOCKSIZE]], [[CAST_VTC]] ; CHECK-NEXT: [[IND_END22:%.*]] = getelementptr i8, i8* [[PSRC]], i64 [[N_VEC19]] Index: llvm/test/Transforms/LoopVectorize/float-induction.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/float-induction.ll +++ llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -30,9 +30,9 @@ ; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 ; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL1: vector.ph: -; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 -; VEC4_INTERL1-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float -; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[CAST_CRD]] +; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4 +; VEC4_INTERL1-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[CAST_VTC]] ; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]] ; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 ; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer @@ -88,9 +88,9 @@ ; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 ; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL2: vector.ph: -; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584 -; VEC4_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float -; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[CAST_CRD]] +; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -8 +; VEC4_INTERL2-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[CAST_VTC]] ; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]] ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 ; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer @@ -150,9 +150,9 @@ ; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 ; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC1_INTERL2: vector.ph: -; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 -; VEC1_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float -; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[CAST_CRD]] +; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -2 +; VEC1_INTERL2-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[CAST_VTC]] ; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]] ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC1_INTERL2: vector.body: @@ -203,9 +203,9 @@ ; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.ph: -; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 -; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[CAST_CRD]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -2 +; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[CAST_VTC]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer @@ -289,9 +289,9 @@ ; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 ; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL1: vector.ph: -; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 -; VEC4_INTERL1-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float -; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[CAST_CRD]] +; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4 +; VEC4_INTERL1-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[CAST_VTC]] ; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP3]] ; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 ; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer @@ -347,9 +347,9 @@ ; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 ; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL2: vector.ph: -; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584 -; VEC4_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float -; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[CAST_CRD]] +; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -8 +; VEC4_INTERL2-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[CAST_VTC]] ; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP3]] ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 ; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer @@ -409,9 +409,9 @@ ; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 ; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC1_INTERL2: vector.ph: -; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 -; VEC1_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float -; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[CAST_CRD]] +; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -2 +; VEC1_INTERL2-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[CAST_VTC]] ; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP3]] ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC1_INTERL2: vector.body: @@ -464,9 +464,9 @@ ; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.ph: -; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 -; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[CAST_CRD]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -2 +; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[CAST_VTC]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP3]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer @@ -551,9 +551,9 @@ ; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 ; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL1: vector.ph: -; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 -; VEC4_INTERL1-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float -; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 +; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4 +; VEC4_INTERL1-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_VTC]], 5.000000e-01 ; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP3]], [[INIT:%.*]] ; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 ; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer @@ -602,9 +602,9 @@ ; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 ; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL2: vector.ph: -; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584 -; VEC4_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float -; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 +; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -8 +; VEC4_INTERL2-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_VTC]], 5.000000e-01 ; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP3]], [[INIT:%.*]] ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 ; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer @@ -657,9 +657,9 @@ ; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 ; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC1_INTERL2: vector.ph: -; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 -; VEC1_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float -; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 +; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -2 +; VEC1_INTERL2-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_VTC]], 5.000000e-01 ; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP3]], [[INIT:%.*]] ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC1_INTERL2: vector.body: @@ -709,9 +709,9 @@ ; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.ph: -; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 -; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 +; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -2 +; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_VTC]], 5.000000e-01 ; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP3]], [[INIT:%.*]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer @@ -795,13 +795,13 @@ ; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 3 ; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL1: vector.ph: -; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 8589934588 -; VEC4_INTERL1-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float -; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = fmul fast float [[CAST_CRD]], -5.000000e-01 +; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], -4 +; VEC4_INTERL1-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = fmul fast float [[CAST_VTC]], -5.000000e-01 ; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP4]], 0x3FB99999A0000000 -; VEC4_INTERL1-NEXT: [[CAST_CRD3:%.*]] = sitofp i64 [[N_VEC]] to float -; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP0]], [[CAST_CRD3]] -; VEC4_INTERL1-NEXT: [[IND_END4:%.*]] = fadd fast float [[TMP5]], [[INIT:%.*]] +; VEC4_INTERL1-NEXT: [[CAST_VTC2:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP0]], [[CAST_VTC2]] +; VEC4_INTERL1-NEXT: [[IND_END3:%.*]] = fadd fast float [[TMP5]], [[INIT:%.*]] ; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 ; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer ; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 @@ -841,12 +841,12 @@ ; VEC4_INTERL1: scalar.ph: ; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] ; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ] -; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL2:%.*]] = phi float [ [[IND_END4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] +; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL4:%.*]] = phi float [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] ; VEC4_INTERL1-NEXT: br label [[FOR_BODY:%.*]] ; VEC4_INTERL1: for.body: ; VEC4_INTERL1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; VEC4_INTERL1-NEXT: [[Y_012:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[CONV1:%.*]], [[FOR_BODY]] ] -; VEC4_INTERL1-NEXT: [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; VEC4_INTERL1-NEXT: [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; VEC4_INTERL1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] ; VEC4_INTERL1-NEXT: store float [[X_011]], float* [[ARRAYIDX]], align 4 ; VEC4_INTERL1-NEXT: [[ADD]] = fadd fast float [[X_011]], [[TMP0]] @@ -877,13 +877,13 @@ ; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 7 ; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL2: vector.ph: -; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 8589934584 -; VEC4_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float -; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = fmul fast float [[CAST_CRD]], -5.000000e-01 +; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], -8 +; VEC4_INTERL2-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = fmul fast float [[CAST_VTC]], -5.000000e-01 ; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP4]], 0x3FB99999A0000000 -; VEC4_INTERL2-NEXT: [[CAST_CRD3:%.*]] = sitofp i64 [[N_VEC]] to float -; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP0]], [[CAST_CRD3]] -; VEC4_INTERL2-NEXT: [[IND_END4:%.*]] = fadd fast float [[TMP5]], [[INIT:%.*]] +; VEC4_INTERL2-NEXT: [[CAST_VTC2:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP0]], [[CAST_VTC2]] +; VEC4_INTERL2-NEXT: [[IND_END3:%.*]] = fadd fast float [[TMP5]], [[INIT:%.*]] ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 ; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT6:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 @@ -938,12 +938,12 @@ ; VEC4_INTERL2: scalar.ph: ; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] ; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ] -; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL2:%.*]] = phi float [ [[IND_END4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] +; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL4:%.*]] = phi float [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] ; VEC4_INTERL2-NEXT: br label [[FOR_BODY:%.*]] ; VEC4_INTERL2: for.body: ; VEC4_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; VEC4_INTERL2-NEXT: [[Y_012:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[CONV1:%.*]], [[FOR_BODY]] ] -; VEC4_INTERL2-NEXT: [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; VEC4_INTERL2-NEXT: [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; VEC4_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] ; VEC4_INTERL2-NEXT: store float [[X_011]], float* [[ARRAYIDX]], align 4 ; VEC4_INTERL2-NEXT: [[ADD]] = fadd fast float [[X_011]], [[TMP0]] @@ -974,13 +974,13 @@ ; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP1]], 0 ; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC1_INTERL2: vector.ph: -; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 8589934590 -; VEC1_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float -; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = fmul fast float [[CAST_CRD]], -5.000000e-01 +; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], -2 +; VEC1_INTERL2-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = fmul fast float [[CAST_VTC]], -5.000000e-01 ; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP4]], 0x3FB99999A0000000 -; VEC1_INTERL2-NEXT: [[CAST_CRD3:%.*]] = sitofp i64 [[N_VEC]] to float -; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP0]], [[CAST_CRD3]] -; VEC1_INTERL2-NEXT: [[IND_END4:%.*]] = fadd fast float [[TMP5]], [[INIT:%.*]] +; VEC1_INTERL2-NEXT: [[CAST_VTC2:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP0]], [[CAST_VTC2]] +; VEC1_INTERL2-NEXT: [[IND_END3:%.*]] = fadd fast float [[TMP5]], [[INIT:%.*]] ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC1_INTERL2: vector.body: ; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1018,12 +1018,12 @@ ; VEC1_INTERL2: scalar.ph: ; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] ; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ] -; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL2:%.*]] = phi float [ [[IND_END4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] +; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL4:%.*]] = phi float [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] ; VEC1_INTERL2-NEXT: br label [[FOR_BODY:%.*]] ; VEC1_INTERL2: for.body: ; VEC1_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; VEC1_INTERL2-NEXT: [[Y_012:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[CONV1:%.*]], [[FOR_BODY]] ] -; VEC1_INTERL2-NEXT: [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; VEC1_INTERL2-NEXT: [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; VEC1_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] ; VEC1_INTERL2-NEXT: store float [[X_011]], float* [[ARRAYIDX]], align 4 ; VEC1_INTERL2-NEXT: [[ADD]] = fadd fast float [[X_011]], [[TMP0]] @@ -1054,13 +1054,13 @@ ; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP1]], 0 ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.ph: -; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 8589934590 -; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fmul fast float [[CAST_CRD]], -5.000000e-01 +; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], -2 +; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fmul fast float [[CAST_VTC]], -5.000000e-01 ; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP4]], 0x3FB99999A0000000 -; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_CRD3:%.*]] = sitofp i64 [[N_VEC]] to float -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP0]], [[CAST_CRD3]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END4:%.*]] = fadd fast float [[TMP5]], [[INIT:%.*]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_VTC2:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP0]], [[CAST_VTC2]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END3:%.*]] = fadd fast float [[TMP5]], [[INIT:%.*]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 @@ -1100,7 +1100,7 @@ ; VEC2_INTERL1_PRED_STORE: for.body: ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] ; VEC2_INTERL1_PRED_STORE-NEXT: [[Y_012:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[X_011:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[X_011:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] ; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] ; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_011]], float* [[ARRAYIDX]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD]] = fadd fast float [[X_011]], [[TMP0]] @@ -1172,9 +1172,9 @@ ; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 ; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL1: vector.ph: -; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 -; VEC4_INTERL1-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float -; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 +; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4 +; VEC4_INTERL1-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_VTC]], 5.000000e-01 ; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP3]], 1.000000e+00 ; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC4_INTERL1: vector.body: @@ -1220,9 +1220,9 @@ ; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 ; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL2: vector.ph: -; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584 -; VEC4_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float -; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 +; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -8 +; VEC4_INTERL2-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_VTC]], 5.000000e-01 ; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP3]], 1.000000e+00 ; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC4_INTERL2: vector.body: @@ -1272,9 +1272,9 @@ ; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 ; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC1_INTERL2: vector.ph: -; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 -; VEC1_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float -; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 +; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -2 +; VEC1_INTERL2-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_VTC]], 5.000000e-01 ; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP3]], 1.000000e+00 ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC1_INTERL2: vector.body: @@ -1324,9 +1324,9 @@ ; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.ph: -; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 -; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 +; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -2 +; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_VTC]], 5.000000e-01 ; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP3]], 1.000000e+00 ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.body: @@ -1389,7 +1389,7 @@ ; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL1: vector.ph: ; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775804 -; VEC4_INTERL1-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL1-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to float ; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC4_INTERL1: vector.body: ; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ] @@ -1440,7 +1440,7 @@ ; VEC4_INTERL1-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; VEC4_INTERL1: scalar.ph: ; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[CAST_CRD]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[CAST_VTC]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; VEC4_INTERL1-NEXT: br label [[FOR_BODY:%.*]] ; VEC4_INTERL1: for.body: ; VEC4_INTERL1-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -1467,10 +1467,10 @@ ; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL2: vector.ph: ; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775800 -; VEC4_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL2-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to float ; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC4_INTERL2: vector.body: -; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE17:%.*]] ] +; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE16:%.*]] ] ; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float ; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 4 ; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] @@ -1478,9 +1478,9 @@ ; VEC4_INTERL2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 ; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 4 ; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>* -; VEC4_INTERL2-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, <4 x float>* [[TMP5]], align 4 +; VEC4_INTERL2-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, <4 x float>* [[TMP5]], align 4 ; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], zeroinitializer -; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD3]], zeroinitializer +; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD2]], zeroinitializer ; VEC4_INTERL2-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP6]], i64 0 ; VEC4_INTERL2-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; VEC4_INTERL2: pred.store.if: @@ -1489,66 +1489,66 @@ ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE]] ; VEC4_INTERL2: pred.store.continue: ; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP6]], i64 1 -; VEC4_INTERL2-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] +; VEC4_INTERL2-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] ; VEC4_INTERL2: pred.store.if3: ; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 1 ; VEC4_INTERL2-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]] ; VEC4_INTERL2-NEXT: store float [[TMP11]], float* [[TMP13]], align 4 -; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE5]] +; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE4]] ; VEC4_INTERL2: pred.store.continue4: ; VEC4_INTERL2-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP6]], i64 2 -; VEC4_INTERL2-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] +; VEC4_INTERL2-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] ; VEC4_INTERL2: pred.store.if5: ; VEC4_INTERL2-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP0]], 2.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 2 ; VEC4_INTERL2-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]] ; VEC4_INTERL2-NEXT: store float [[TMP15]], float* [[TMP17]], align 4 -; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE7]] +; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE6]] ; VEC4_INTERL2: pred.store.continue6: ; VEC4_INTERL2-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP6]], i64 3 -; VEC4_INTERL2-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]] +; VEC4_INTERL2-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] ; VEC4_INTERL2: pred.store.if7: ; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = fadd fast float [[TMP0]], 3.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP20:%.*]] = or i64 [[INDEX]], 3 ; VEC4_INTERL2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP20]] ; VEC4_INTERL2-NEXT: store float [[TMP19]], float* [[TMP21]], align 4 -; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE9]] +; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE8]] ; VEC4_INTERL2: pred.store.continue8: ; VEC4_INTERL2-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP7]], i64 0 -; VEC4_INTERL2-NEXT: br i1 [[TMP22]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]] +; VEC4_INTERL2-NEXT: br i1 [[TMP22]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] ; VEC4_INTERL2: pred.store.if9: ; VEC4_INTERL2-NEXT: [[TMP23:%.*]] = fadd fast float [[TMP0]], 4.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP1]] ; VEC4_INTERL2-NEXT: store float [[TMP23]], float* [[TMP24]], align 4 -; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE11]] +; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE10]] ; VEC4_INTERL2: pred.store.continue10: ; VEC4_INTERL2-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP7]], i64 1 -; VEC4_INTERL2-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]] +; VEC4_INTERL2-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]] ; VEC4_INTERL2: pred.store.if11: ; VEC4_INTERL2-NEXT: [[TMP26:%.*]] = fadd fast float [[TMP0]], 5.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP27:%.*]] = or i64 [[INDEX]], 5 ; VEC4_INTERL2-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP27]] ; VEC4_INTERL2-NEXT: store float [[TMP26]], float* [[TMP28]], align 4 -; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE13]] +; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE12]] ; VEC4_INTERL2: pred.store.continue12: ; VEC4_INTERL2-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP7]], i64 2 -; VEC4_INTERL2-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]] +; VEC4_INTERL2-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]] ; VEC4_INTERL2: pred.store.if13: ; VEC4_INTERL2-NEXT: [[TMP30:%.*]] = fadd fast float [[TMP0]], 6.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 6 ; VEC4_INTERL2-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP31]] ; VEC4_INTERL2-NEXT: store float [[TMP30]], float* [[TMP32]], align 4 -; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE15]] +; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE14]] ; VEC4_INTERL2: pred.store.continue14: ; VEC4_INTERL2-NEXT: [[TMP33:%.*]] = extractelement <4 x i1> [[TMP7]], i64 3 -; VEC4_INTERL2-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17]] +; VEC4_INTERL2-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16]] ; VEC4_INTERL2: pred.store.if15: ; VEC4_INTERL2-NEXT: [[TMP34:%.*]] = fadd fast float [[TMP0]], 7.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP35:%.*]] = or i64 [[INDEX]], 7 ; VEC4_INTERL2-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP35]] ; VEC4_INTERL2-NEXT: store float [[TMP34]], float* [[TMP36]], align 4 -; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE17]] +; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE16]] ; VEC4_INTERL2: pred.store.continue16: ; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; VEC4_INTERL2-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1558,7 +1558,7 @@ ; VEC4_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; VEC4_INTERL2: scalar.ph: ; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[CAST_CRD]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[CAST_VTC]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; VEC4_INTERL2-NEXT: br label [[FOR_BODY:%.*]] ; VEC4_INTERL2: for.body: ; VEC4_INTERL2-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -1585,7 +1585,7 @@ ; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC1_INTERL2: vector.ph: ; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806 -; VEC1_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC1_INTERL2-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to float ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC1_INTERL2: vector.body: ; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] @@ -1616,7 +1616,7 @@ ; VEC1_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; VEC1_INTERL2: scalar.ph: ; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[CAST_CRD]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[CAST_VTC]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; VEC1_INTERL2-NEXT: br label [[FOR_BODY:%.*]] ; VEC1_INTERL2: for.body: ; VEC1_INTERL2-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -1643,7 +1643,7 @@ ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.ph: ; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806 -; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_VTC:%.*]] = sitofp i64 [[N_VEC]] to float ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.body: ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] @@ -1676,7 +1676,7 @@ ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; VEC2_INTERL1_PRED_STORE: for.body: ; VEC2_INTERL1_PRED_STORE-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[CAST_CRD]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[CAST_VTC]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; VEC2_INTERL1_PRED_STORE-NEXT: [[VAR0:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[VAR1:%.*]] = load float, float* [[VAR0]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00 Index: llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll +++ llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll @@ -25,7 +25,7 @@ ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -48,7 +48,7 @@ ; CHECK-NEXT: store <4 x i32> [[PREDPHI7]], <4 x i32>* [[TMP17]], align 4, !alias.scope !0, !noalias !3 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -76,7 +76,7 @@ ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], [[LOOP7:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: Index: llvm/test/Transforms/LoopVectorize/induction.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/induction.ll +++ llvm/test/Transforms/LoopVectorize/induction.ll @@ -63,7 +63,7 @@ ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IND: vector.ph: -; IND-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 +; IND-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -2 ; IND-NEXT: [[CAST_VTC:%.*]] = trunc i64 [[N_VEC]] to i32 ; IND-NEXT: [[IND_END:%.*]] = add i32 [[CAST_VTC]], 190 ; IND-NEXT: br label [[VECTOR_BODY:%.*]] @@ -105,7 +105,7 @@ ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL: vector.ph: -; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 +; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4 ; UNROLL-NEXT: [[CAST_VTC:%.*]] = trunc i64 [[N_VEC]] to i32 ; UNROLL-NEXT: [[IND_END:%.*]] = add i32 [[CAST_VTC]], 190 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] @@ -202,7 +202,7 @@ ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; INTERLEAVE: vector.ph: -; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584 +; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -8 ; INTERLEAVE-NEXT: [[CAST_VTC:%.*]] = trunc i64 [[N_VEC]] to i32 ; INTERLEAVE-NEXT: [[IND_END:%.*]] = add i32 [[CAST_VTC]], 190 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -979,7 +979,7 @@ ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IND: vector.ph: -; IND-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 4611686018427387902 +; IND-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -2 ; IND-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 3 ; IND-NEXT: br label [[VECTOR_BODY:%.*]] ; IND: vector.body: @@ -1039,7 +1039,7 @@ ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 24 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL: vector.ph: -; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 4611686018427387900 +; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4 ; UNROLL-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 3 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: @@ -1695,7 +1695,7 @@ ; IND-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; IND-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; IND: vector.ph: -; IND-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 +; IND-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -2 ; IND-NEXT: br label [[VECTOR_BODY:%.*]] ; IND: vector.body: ; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1757,7 +1757,7 @@ ; UNROLL-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; UNROLL-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; UNROLL: vector.ph: -; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 +; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -2582,7 +2582,7 @@ ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IND: vector.ph: -; IND-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 +; IND-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -2 ; IND-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i64 0 ; IND-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; IND-NEXT: br label [[VECTOR_BODY:%.*]] @@ -2630,7 +2630,7 @@ ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL: vector.ph: -; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 +; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4 ; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i64 0 ; UNROLL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; UNROLL-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[A]], i64 0 @@ -2754,7 +2754,7 @@ ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; INTERLEAVE: vector.ph: -; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584 +; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -8 ; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i64 0 ; INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0 @@ -3333,7 +3333,7 @@ ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i8 [[DOTPR_I]], -1 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IND: vector.ph: -; IND-NEXT: [[N_VEC:%.*]] = and i32 [[TMP3]], 510 +; IND-NEXT: [[N_VEC:%.*]] = and i32 [[TMP3]], -2 ; IND-NEXT: [[CAST_VTC:%.*]] = trunc i32 [[N_VEC]] to i8 ; IND-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[CAST_VTC]] ; IND-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> , i32 [[C_PROMOTED_I]], i64 0 @@ -3375,7 +3375,7 @@ ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ugt i8 [[DOTPR_I]], -4 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL: vector.ph: -; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[TMP3]], 508 +; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[TMP3]], -4 ; UNROLL-NEXT: [[CAST_VTC:%.*]] = trunc i32 [[N_VEC]] to i8 ; UNROLL-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[CAST_VTC]] ; UNROLL-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> , i32 [[C_PROMOTED_I]], i64 0 @@ -3469,7 +3469,7 @@ ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ugt i8 [[DOTPR_I]], -8 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; INTERLEAVE: vector.ph: -; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[TMP3]], 504 +; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[TMP3]], -8 ; INTERLEAVE-NEXT: [[CAST_VTC:%.*]] = trunc i32 [[N_VEC]] to i8 ; INTERLEAVE-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[CAST_VTC]] ; INTERLEAVE-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> , i32 [[C_PROMOTED_I]], i64 0 Index: llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -1306,7 +1306,7 @@ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 6 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775804 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4 ; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer @@ -1396,7 +1396,7 @@ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 6 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775804 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4 ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[N_VEC]], 1 ; CHECK-NEXT: [[IND_END:%.*]] = or i64 [[TMP3]], 3 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] Index: llvm/test/Transforms/LoopVectorize/loop-scalars.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/loop-scalars.ll +++ llvm/test/Transforms/LoopVectorize/loop-scalars.ll @@ -70,7 +70,7 @@ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775806 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -2 ; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -130,7 +130,7 @@ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775806 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -2 ; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: Index: llvm/test/Transforms/LoopVectorize/runtime-check.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/runtime-check.ll +++ llvm/test/Transforms/LoopVectorize/runtime-check.ll @@ -28,7 +28,7 @@ ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], 16, [[DBG9]] ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]], [[DBG9]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588, [[DBG9]] +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4, [[DBG9]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]], [[DBG9]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [[DBG9]] Index: llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll +++ llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll @@ -183,7 +183,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], 28 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]] ; CHECK: vector.ph.new: -; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP2]], 9223372036854775800 +; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP2]], -8 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_7:%.*]], [[VECTOR_BODY]] ] Index: llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll +++ llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll @@ -42,7 +42,7 @@ ; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP1]], 16 ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]] ; CHECK: vector.ph.new: -; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP3]], 2305843009213693950 +; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP3]], -2 ; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP6:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT10]] ; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT12]]