diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -10632,9 +10632,6 @@ DT); ++LoopsVectorized; - simplifyLoop(L, DT, LI, SE, AC, nullptr, false /* PreserveLCSSA */); - formLCSSARecursively(*L, *DT, LI, SE); - // Second pass vectorizes the epilogue and adjusts the control flow // edges from the first pass. EPI.MainLoopVF = EPI.EpilogueVF; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll @@ -60,43 +60,40 @@ ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 1, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[BIN_RDX]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_MOD_VF5:%.*]] = urem i64 [[N]], 2 -; CHECK-NEXT: [[N_VEC6:%.*]] = sub i64 [[N]], [[N_MOD_VF5]] +; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[N]], 2 +; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[N]], [[N_MOD_VF4]] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[VEC_EPILOG_PH]] ], [ [[TMP30:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI8:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[VEC_EPILOG_PH]] ], [ [[TMP30:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP25]] ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, i64* [[TMP26]], i32 0 ; CHECK-NEXT: [[TMP28:%.*]] = bitcast i64* [[TMP27]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <2 x i64>, <2 x i64>* [[TMP28]], align 4 -; CHECK-NEXT: [[TMP29:%.*]] = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> [[WIDE_LOAD10]]) -; CHECK-NEXT: [[TMP30]] = and i64 [[TMP29]], [[VEC_PHI9]] -; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT11]], [[N_VEC6]] +; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <2 x i64>, <2 x i64>* [[TMP28]], align 4 +; CHECK-NEXT: [[TMP29:%.*]] = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> [[WIDE_LOAD9]]) +; CHECK-NEXT: [[TMP30]] = and i64 [[TMP29]], [[VEC_PHI8]] +; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[TMP31]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[N]], [[N_VEC6]] -; CHECK-NEXT: br i1 [[CMP_N7]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] +; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC6]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX12:%.*]] = phi i64 [ 1, [[ITER_CHECK]] ], [ [[BIN_RDX]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP30]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX11:%.*]] = phi i64 [ 1, [[ITER_CHECK]] ], [ [[BIN_RDX]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP30]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] -; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[AND:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX12]], [[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[AND:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX11]], [[VEC_EPILOG_SCALAR_PH]] ] ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[IV]] ; CHECK-NEXT: [[L3:%.*]] = load i64, i64* [[L2]], align 4 ; CHECK-NEXT: [[AND]] = and i64 [[RDX]], [[L3]] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: [[AND_LCSSA4:%.*]] = phi i64 [ [[AND]], [[FOR_BODY]] ], [ [[TMP30]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[AND_LCSSA:%.*]] = phi i64 [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ], [ [[AND_LCSSA4]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[AND_LCSSA:%.*]] = phi i64 [ [[AND]], [[FOR_BODY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ], [ [[TMP30]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[AND_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll @@ -59,44 +59,41 @@ ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 5, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP23]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_MOD_VF5:%.*]] = urem i64 [[N]], 2 -; CHECK-NEXT: [[N_VEC6:%.*]] = sub i64 [[N]], [[N_MOD_VF5]] +; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[N]], 2 +; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[N]], [[N_MOD_VF4]] ; CHECK-NEXT: [[TMP24:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi <2 x i64> [ [[TMP24]], [[VEC_EPILOG_PH]] ], [ [[TMP29:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI8:%.*]] = phi <2 x i64> [ [[TMP24]], [[VEC_EPILOG_PH]] ], [ [[TMP29:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP25]] ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, i64* [[TMP26]], i32 0 ; CHECK-NEXT: [[TMP28:%.*]] = bitcast i64* [[TMP27]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <2 x i64>, <2 x i64>* [[TMP28]], align 4 -; CHECK-NEXT: [[TMP29]] = add <2 x i64> [[WIDE_LOAD10]], [[VEC_PHI9]] -; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT11]], [[N_VEC6]] +; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <2 x i64>, <2 x i64>* [[TMP28]], align 4 +; CHECK-NEXT: [[TMP29]] = add <2 x i64> [[WIDE_LOAD9]], [[VEC_PHI8]] +; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[TMP31:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP29]]) -; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[N]], [[N_VEC6]] -; CHECK-NEXT: br i1 [[CMP_N7]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] +; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC6]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX12:%.*]] = phi i64 [ 5, [[ITER_CHECK]] ], [ [[TMP23]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP31]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX11:%.*]] = phi i64 [ 5, [[ITER_CHECK]] ], [ [[TMP23]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP31]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX12]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX11]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[IV]] ; CHECK-NEXT: [[TMP32:%.*]] = load i64, i64* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ADD]] = add i64 [[TMP32]], [[SUM]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: [[ADD_LCSSA4:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP31]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[TMP23]], [[MIDDLE_BLOCK]] ], [ [[ADD_LCSSA4]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ], [ [[TMP31]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[ADD_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll @@ -56,42 +56,39 @@ ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP19]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[N]], 2 -; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[N]], [[N_MOD_VF4]] +; CHECK-NEXT: [[N_MOD_VF3:%.*]] = urem i64 [[N]], 2 +; CHECK-NEXT: [[N_VEC4:%.*]] = sub i64 [[N]], [[N_MOD_VF3]] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI8:%.*]] = phi float [ [[BC_MERGE_RDX]], [[VEC_EPILOG_PH]] ], [ [[TMP27:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi float [ [[BC_MERGE_RDX]], [[VEC_EPILOG_PH]] ], [ [[TMP27:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP23]] ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, float* [[TMP24]], i32 0 ; CHECK-NEXT: [[TMP26:%.*]] = bitcast float* [[TMP25]] to <2 x float>* -; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <2 x float>, <2 x float>* [[TMP26]], align 4 -; CHECK-NEXT: [[TMP27]] = call float @llvm.vector.reduce.fadd.v2f32(float [[VEC_PHI8]], <2 x float> [[WIDE_LOAD9]]) -; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC5]] +; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x float>, <2 x float>* [[TMP26]], align 4 +; CHECK-NEXT: [[TMP27]] = call float @llvm.vector.reduce.fadd.v2f32(float [[VEC_PHI7]], <2 x float> [[WIDE_LOAD8]]) +; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC4]] ; CHECK-NEXT: br i1 [[TMP28]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] -; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[N]], [[N_VEC4]] +; CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX11:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[ITER_CHECK]] ], [ [[TMP19]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[ITER_CHECK]] ], [ [[TMP19]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX11]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX10]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IV]] ; CHECK-NEXT: [[TMP29:%.*]] = load float, float* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ADD]] = fadd float [[TMP29]], [[SUM_07]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: [[ADD_LCSSA3:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ [[ADD_LCSSA3]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ [[TMP27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret float [[ADD_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll @@ -74,20 +74,20 @@ ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 1024, [[N_MOD_VF2]] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX5]], 0 +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP27]] ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, i8* [[TMP28]], i32 0 ; CHECK-NEXT: [[TMP30:%.*]] = bitcast i8* [[TMP29]] to * ; CHECK-NEXT: store shufflevector ( insertelement ( poison, i8 1, i32 0), poison, zeroinitializer), * [[TMP30]], align 1 ; CHECK-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP32:%.*]] = mul i64 [[TMP31]], 8 -; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], [[TMP32]] +; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[OFFSET_IDX]], [[TMP32]] ; CHECK-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP33]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 1024, [[N_VEC3]] -; CHECK-NEXT: br i1 [[CMP_N4]], label [[EXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: br i1 [[CMP_N4]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -97,9 +97,7 @@ ; CHECK-NEXT: store i8 1, i8* [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: exit.loopexit: -; CHECK-NEXT: br label [[EXIT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -151,18 +149,18 @@ ; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK-VF8: vec.epilog.vector.body: -; CHECK-VF8-NEXT: [[INDEX2:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-VF8-NEXT: [[TMP21:%.*]] = add i64 [[INDEX2]], 0 +; CHECK-VF8-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-VF8-NEXT: [[TMP21:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-VF8-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP21]] ; CHECK-VF8-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, i8* [[TMP22]], i32 0 ; CHECK-VF8-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP23]] to <8 x i8>* ; CHECK-VF8-NEXT: store <8 x i8> , <8 x i8>* [[TMP24]], align 1 -; CHECK-VF8-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 8 +; CHECK-VF8-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[OFFSET_IDX]], 8 ; CHECK-VF8-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024 ; CHECK-VF8-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK-VF8: vec.epilog.middle.block: ; CHECK-VF8-NEXT: [[CMP_N1:%.*]] = icmp eq i64 1024, 1024 -; CHECK-VF8-NEXT: br i1 [[CMP_N1]], label [[EXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-VF8-NEXT: br i1 [[CMP_N1]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK-VF8: vec.epilog.scalar.ph: ; CHECK-VF8-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-VF8-NEXT: br label [[FOR_BODY:%.*]] @@ -172,9 +170,7 @@ ; CHECK-VF8-NEXT: store i8 1, i8* [[ARRAYIDX]], align 1 ; CHECK-VF8-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-VF8-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 1024 -; CHECK-VF8-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK-VF8: exit.loopexit: -; CHECK-VF8-NEXT: br label [[EXIT]] +; CHECK-VF8-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-VF8: exit: ; CHECK-VF8-NEXT: ret void ; @@ -257,18 +253,18 @@ ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX2]], 0 +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP21]] ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, i64* [[TMP22]], i32 0 ; CHECK-NEXT: [[TMP24:%.*]] = bitcast i64* [[TMP23]] to <8 x i64>* ; CHECK-NEXT: store <8 x i64> , <8 x i64>* [[TMP24]], align 1 -; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 8 +; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[OFFSET_IDX]], 8 ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024 ; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N1:%.*]] = icmp eq i64 1024, 1024 -; CHECK-NEXT: br i1 [[CMP_N1]], label [[EXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: br i1 [[CMP_N1]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -278,9 +274,7 @@ ; CHECK-NEXT: store i64 1, i64* [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] -; CHECK: exit.loopexit: -; CHECK-NEXT: br label [[EXIT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -332,18 +326,18 @@ ; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK-VF8: vec.epilog.vector.body: -; CHECK-VF8-NEXT: [[INDEX2:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-VF8-NEXT: [[TMP21:%.*]] = add i64 [[INDEX2]], 0 +; CHECK-VF8-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-VF8-NEXT: [[TMP21:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-VF8-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP21]] ; CHECK-VF8-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, i64* [[TMP22]], i32 0 ; CHECK-VF8-NEXT: [[TMP24:%.*]] = bitcast i64* [[TMP23]] to <8 x i64>* ; CHECK-VF8-NEXT: store <8 x i64> , <8 x i64>* [[TMP24]], align 1 -; CHECK-VF8-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 8 +; CHECK-VF8-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[OFFSET_IDX]], 8 ; CHECK-VF8-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024 ; CHECK-VF8-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-VF8: vec.epilog.middle.block: ; CHECK-VF8-NEXT: [[CMP_N1:%.*]] = icmp eq i64 1024, 1024 -; CHECK-VF8-NEXT: br i1 [[CMP_N1]], label [[EXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-VF8-NEXT: br i1 [[CMP_N1]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK-VF8: vec.epilog.scalar.ph: ; CHECK-VF8-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-VF8-NEXT: br label [[FOR_BODY:%.*]] @@ -353,9 +347,7 @@ ; CHECK-VF8-NEXT: store i64 1, i64* [[ARRAYIDX]], align 1 ; CHECK-VF8-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-VF8-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 1024 -; CHECK-VF8-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] -; CHECK-VF8: exit.loopexit: -; CHECK-VF8-NEXT: br label [[EXIT]] +; CHECK-VF8-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK-VF8: exit: ; CHECK-VF8-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll --- a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll @@ -227,7 +227,7 @@ ; VF-TWO-CHECK-NEXT: br i1 [[TMP144]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOPID_EV:![0-9]+]] ; VF-TWO-CHECK: vec.epilog.middle.block: ; VF-TWO-CHECK-NEXT: [[CMP_N27:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC26]] -; VF-TWO-CHECK-NEXT: br i1 [[CMP_N27]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; VF-TWO-CHECK-NEXT: br i1 [[CMP_N27]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; VF-TWO-CHECK: vec.epilog.scalar.ph: ; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC26]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] ; VF-TWO-CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -242,9 +242,7 @@ ; VF-TWO-CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX4]], align 4 ; VF-TWO-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VF-TWO-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; VF-TWO-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] -; VF-TWO-CHECK: for.end.loopexit.loopexit: -; VF-TWO-CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] +; VF-TWO-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] ; VF-TWO-CHECK: for.end.loopexit: ; VF-TWO-CHECK-NEXT: br label [[FOR_END]] ; VF-TWO-CHECK: for.end: @@ -471,7 +469,7 @@ ; VF-FOUR-CHECK-NEXT: br i1 [[TMP144]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; VF-FOUR-CHECK: vec.epilog.middle.block: ; VF-FOUR-CHECK-NEXT: [[CMP_N27:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC26]] -; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N27]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N27]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; VF-FOUR-CHECK: vec.epilog.scalar.ph: ; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC26]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] ; VF-FOUR-CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -486,9 +484,7 @@ ; VF-FOUR-CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX4]], align 4 ; VF-FOUR-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VF-FOUR-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; VF-FOUR-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] -; VF-FOUR-CHECK: for.end.loopexit.loopexit: -; VF-FOUR-CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] +; VF-FOUR-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] ; VF-FOUR-CHECK: for.end.loopexit: ; VF-FOUR-CHECK-NEXT: br label [[FOR_END]] ; VF-FOUR-CHECK: for.end: @@ -725,7 +721,7 @@ ; VF-TWO-CHECK-NEXT: br i1 [[TMP126]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VF-TWO-CHECK: vec.epilog.middle.block: ; VF-TWO-CHECK-NEXT: [[CMP_N20:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC17]] -; VF-TWO-CHECK-NEXT: br i1 [[CMP_N20]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; VF-TWO-CHECK-NEXT: br i1 [[CMP_N20]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; VF-TWO-CHECK: vec.epilog.scalar.ph: ; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC17]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ] ; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL18:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END19]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ] @@ -744,9 +740,7 @@ ; VF-TWO-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VF-TWO-CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_014]], 1 ; VF-TWO-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; VF-TWO-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOPID_MS_CM:![0-9]+]] -; VF-TWO-CHECK: for.end.loopexit.loopexit: -; VF-TWO-CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] +; VF-TWO-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOPID_MS_CM:![0-9]+]] ; VF-TWO-CHECK: for.end.loopexit: ; VF-TWO-CHECK-NEXT: br label [[FOR_END]] ; VF-TWO-CHECK: for.end: @@ -952,7 +946,7 @@ ; VF-FOUR-CHECK-NEXT: br i1 [[TMP126]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOPID_EV_CM:![0-9]+]] ; VF-FOUR-CHECK: vec.epilog.middle.block: ; VF-FOUR-CHECK-NEXT: [[CMP_N20:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC17]] -; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N20]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N20]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; VF-FOUR-CHECK: vec.epilog.scalar.ph: ; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC17]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ] ; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL18:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END19]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ] @@ -971,9 +965,7 @@ ; VF-FOUR-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VF-FOUR-CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_014]], 1 ; VF-FOUR-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; VF-FOUR-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] -; VF-FOUR-CHECK: for.end.loopexit.loopexit: -; VF-FOUR-CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] +; VF-FOUR-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] ; VF-FOUR-CHECK: for.end.loopexit: ; VF-FOUR-CHECK-NEXT: br label [[FOR_END]] ; VF-FOUR-CHECK: for.end: diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll @@ -50,29 +50,29 @@ ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK_NOT_NOT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_VEC12:%.*]] = and i64 [[SMAX6]], 9223372036854775800 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT16:%.*]] = insertelement <8 x i32*> poison, i32* [[A]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT17:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT16]], <8 x i32*> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT18:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT19:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT18]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[N_VEC11:%.*]] = and i64 [[SMAX6]], 9223372036854775800 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <8 x i32*> poison, i32* [[A]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT15]], <8 x i32*> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT18:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT17]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT22:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT21:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <8 x i32>* -; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT19]], <8 x i32>* [[TMP6]], align 4 -; CHECK-NEXT: [[INDEX_NEXT22]] = add nuw i64 [[OFFSET_IDX]], 8 -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT22]], [[N_VEC12]] +; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT18]], <8 x i32>* [[TMP6]], align 4 +; CHECK-NEXT: [[INDEX_NEXT21]] = add nuw i64 [[OFFSET_IDX]], 8 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT21]], [[N_VEC11]] ; CHECK-NEXT: br i1 [[TMP7]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <8 x i32*> [[BROADCAST_SPLAT17]], zeroinitializer -; CHECK-NEXT: [[WIDE_MASKED_GATHER20:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[BROADCAST_SPLAT17]], i32 4, <8 x i1> [[TMP8]], <8 x i32> undef) -; CHECK-NEXT: [[PREDPHI21:%.*]] = select <8 x i1> [[TMP8]], <8 x i32> [[WIDE_MASKED_GATHER20]], <8 x i32> -; CHECK-NEXT: [[CMP_N13:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC12]] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[PREDPHI21]], i64 7 -; CHECK-NEXT: br i1 [[CMP_N13]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <8 x i32*> [[BROADCAST_SPLAT16]], zeroinitializer +; CHECK-NEXT: [[WIDE_MASKED_GATHER19:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[BROADCAST_SPLAT16]], i32 4, <8 x i1> [[TMP8]], <8 x i32> undef) +; CHECK-NEXT: [[PREDPHI20:%.*]] = select <8 x i1> [[TMP8]], <8 x i32> [[WIDE_MASKED_GATHER19]], <8 x i32> +; CHECK-NEXT: [[CMP_N12:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC11]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[PREDPHI20]], i64 7 +; CHECK-NEXT: br i1 [[CMP_N12]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC12]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] @@ -87,12 +87,9 @@ ; CHECK-NEXT: [[A_LCSSA:%.*]] = phi i32 [ [[ALOAD]], [[COND_LOAD]] ], [ 1, [[FOR_BODY]] ] ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP9:![0-9]+]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: [[A_LCSSA_LCSSA10:%.*]] = phi i32 [ [[A_LCSSA]], [[LATCH]] ], [ [[TMP9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[A_LCSSA_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[A_LCSSA_LCSSA10]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[A_LCSSA_LCSSA:%.*]] = phi i32 [ [[A_LCSSA]], [[LATCH]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[TMP9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[A_LCSSA_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll @@ -85,7 +85,7 @@ ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP17]]) ; CHECK-NEXT: [[CMP_N18:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC17]] -; CHECK-NEXT: br i1 [[CMP_N18]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: br i1 [[CMP_N18]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC17]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: [[BC_MERGE_RDX23:%.*]] = phi i32 [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP13]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK]] ] @@ -99,12 +99,9 @@ ; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4 ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP9:![0-9]+]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: [[T3_LCSSA:%.*]] = phi i32 [ [[T3]], [[FOR_BODY]] ], [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[T4:%.*]] = phi i32 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ [[T3_LCSSA]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[T4:%.*]] = phi i32 [ [[T3]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[T4]] ; entry: @@ -199,7 +196,7 @@ ; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N14:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC13]] -; CHECK-NEXT: br i1 [[CMP_N14]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: br i1 [[CMP_N14]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC13]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -216,9 +213,7 @@ ; CHECK: latch: ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP17:![0-9]+]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -332,7 +327,7 @@ ; CHECK-NEXT: br i1 [[TMP13]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N24:%.*]] = icmp eq i64 [[SMAX16]], [[N_VEC23]] -; CHECK-NEXT: br i1 [[CMP_N24]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: br i1 [[CMP_N24]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC23]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -351,9 +346,7 @@ ; CHECK: latch: ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP28:![0-9]+]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll b/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll --- a/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll @@ -51,7 +51,7 @@ ; CHECK-NEXT: br i1 [[TMP15]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N1:%.*]] = icmp eq i64 17, 16 -; CHECK-NEXT: br i1 [[CMP_N1]], label [[EXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: br i1 [[CMP_N1]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[LOOP_MEMCPY_EXPANSION:%.*]] @@ -63,9 +63,7 @@ ; CHECK-NEXT: store i8 [[VAL]], i8* [[STADDR]], align 64 ; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1 ; CHECK-NEXT: [[IS_NEXT:%.*]] = icmp ult i64 [[I_NEXT]], 17 -; CHECK-NEXT: br i1 [[IS_NEXT]], label [[LOOP_MEMCPY_EXPANSION]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: exit.loopexit: -; CHECK-NEXT: br label [[EXIT]] +; CHECK-NEXT: br i1 [[IS_NEXT]], label [[LOOP_MEMCPY_EXPANSION]], label [[EXIT]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll --- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll @@ -297,7 +297,7 @@ ; AVX512-NEXT: br i1 [[TMP63]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; AVX512: vec.epilog.middle.block: ; AVX512-NEXT: [[CMP_N11:%.*]] = icmp eq i64 10000, 10000 -; AVX512-NEXT: br i1 [[CMP_N11]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; AVX512-NEXT: br i1 [[CMP_N11]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; AVX512: vec.epilog.scalar.ph: ; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; AVX512-NEXT: br label [[FOR_BODY:%.*]] @@ -317,9 +317,7 @@ ; AVX512: for.inc: ; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 -; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; AVX512: for.end.loopexit: -; AVX512-NEXT: br label [[FOR_END]] +; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; AVX512: for.end: ; AVX512-NEXT: ret void ; @@ -632,7 +630,7 @@ ; AVX512-NEXT: br i1 [[TMP63]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; AVX512: vec.epilog.middle.block: ; AVX512-NEXT: [[CMP_N11:%.*]] = icmp eq i64 10000, 10000 -; AVX512-NEXT: br i1 [[CMP_N11]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; AVX512-NEXT: br i1 [[CMP_N11]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; AVX512: vec.epilog.scalar.ph: ; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; AVX512-NEXT: br label [[FOR_BODY:%.*]] @@ -652,9 +650,7 @@ ; AVX512: for.inc: ; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 -; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] -; AVX512: for.end.loopexit: -; AVX512-NEXT: br label [[FOR_END]] +; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; AVX512: for.end: ; AVX512-NEXT: ret void ; @@ -988,7 +984,7 @@ ; AVX512-NEXT: br i1 [[TMP68]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; AVX512: vec.epilog.middle.block: ; AVX512-NEXT: [[CMP_N11:%.*]] = icmp eq i64 10000, 10000 -; AVX512-NEXT: br i1 [[CMP_N11]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; AVX512-NEXT: br i1 [[CMP_N11]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; AVX512: vec.epilog.scalar.ph: ; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; AVX512-NEXT: br label [[FOR_BODY:%.*]] @@ -1009,9 +1005,7 @@ ; AVX512: for.inc: ; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 -; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] -; AVX512: for.end.loopexit: -; AVX512-NEXT: br label [[FOR_END]] +; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; AVX512: for.end: ; AVX512-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll @@ -39,44 +39,41 @@ ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 5, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP6]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_MOD_VF3:%.*]] = urem i64 [[N]], 4 -; CHECK-NEXT: [[N_VEC4:%.*]] = sub i64 [[N]], [[N_MOD_VF3]] +; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]] ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ [[TMP7]], [[VEC_EPILOG_PH]] ], [ [[TMP12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i64> [ [[TMP7]], [[VEC_EPILOG_PH]] ], [ [[TMP12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, i64* [[TMP9]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64* [[TMP10]] to <4 x i64>* -; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, <4 x i64>* [[TMP11]], align 4 -; CHECK-NEXT: [[TMP12]] = add <4 x i64> [[WIDE_LOAD8]], [[VEC_PHI7]] -; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[OFFSET_IDX]], 4 -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC4]] +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i64>, <4 x i64>* [[TMP11]], align 4 +; CHECK-NEXT: [[TMP12]] = add <4 x i64> [[WIDE_LOAD7]], [[VEC_PHI6]] +; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP13]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP12]]) -; CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[N]], [[N_VEC4]] -; CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[CMP_N4]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i64 [ 5, [[ITER_CHECK]] ], [ [[TMP6]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX9:%.*]] = phi i64 [ 5, [[ITER_CHECK]] ], [ [[TMP6]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX10]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX9]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[IV]] ; CHECK-NEXT: [[TMP15:%.*]] = load i64, i64* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ADD]] = add i64 [[TMP15]], [[SUM]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: [[ADD_LCSSA2:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[ADD_LCSSA2]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[TMP14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[ADD_LCSSA]] ; entry: @@ -135,47 +132,44 @@ ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_MOD_VF3:%.*]] = urem i64 [[N]], 4 -; CHECK-NEXT: [[N_VEC4:%.*]] = sub i64 [[N]], [[N_MOD_VF3]] +; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]] ; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[BC_MERGE_RDX]], i32 0 ; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x float> [[MINMAX_IDENT_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x float> [ [[MINMAX_IDENT_SPLAT]], [[VEC_EPILOG_PH]] ], [ [[TMP13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x float> [ [[MINMAX_IDENT_SPLAT]], [[VEC_EPILOG_PH]] ], [ [[TMP13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP9]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP10]] to <4 x float>* -; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, <4 x float>* [[TMP11]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = fcmp fast ogt <4 x float> [[VEC_PHI7]], [[WIDE_LOAD8]] -; CHECK-NEXT: [[TMP13]] = select <4 x i1> [[TMP12]], <4 x float> [[VEC_PHI7]], <4 x float> [[WIDE_LOAD8]] -; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[OFFSET_IDX]], 4 -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC4]] +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, <4 x float>* [[TMP11]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = fcmp fast ogt <4 x float> [[VEC_PHI6]], [[WIDE_LOAD7]] +; CHECK-NEXT: [[TMP13]] = select <4 x i1> [[TMP12]], <4 x float> [[VEC_PHI6]], <4 x float> [[WIDE_LOAD7]] +; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP13]]) -; CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[N]], [[N_VEC4]] -; CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[CMP_N4]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi float [ 0.000000e+00, [[ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX9:%.*]] = phi float [ 0.000000e+00, [[ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] -; CHECK-NEXT: [[RESULT_08:%.*]] = phi float [ [[V0:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX10]], [[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi float [ [[V0:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX9]], [[VEC_EPILOG_SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[L0:%.*]] = load float, float* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[C0:%.*]] = fcmp fast ogt float [[RESULT_08]], [[L0]] ; CHECK-NEXT: [[V0]] = select fast i1 [[C0]], float [[RESULT_08]], float [[L0]] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: [[V0_LCSSA2:%.*]] = phi float [ [[V0]], [[FOR_BODY]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[V0_LCSSA:%.*]] = phi float [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[V0_LCSSA2]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[V0_LCSSA:%.*]] = phi float [ [[V0]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret float [[V0_LCSSA]] ; entry: @@ -237,18 +231,18 @@ ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ [[TMP13]], [[VEC_EPILOG_PH]] ], [ [[TMP23:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ [[TMP13]], [[VEC_EPILOG_PH]] ], [ [[TMP23:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP15:%.*]] = and <4 x i32> [[VEC_PHI4]], +; CHECK-NEXT: [[TMP15:%.*]] = and <4 x i32> [[VEC_PHI3]], ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i32 [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, i16* [[TMP16]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = bitcast i16* [[TMP17]] to <4 x i16>* -; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i16>, <4 x i16>* [[TMP18]], align 2 -; CHECK-NEXT: [[TMP19:%.*]] = zext <4 x i16> [[WIDE_LOAD5]] to <4 x i32> +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i16>, <4 x i16>* [[TMP18]], align 2 +; CHECK-NEXT: [[TMP19:%.*]] = zext <4 x i16> [[WIDE_LOAD4]] to <4 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = or <4 x i32> [[TMP15]], [[TMP19]] -; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i32 [[OFFSET_IDX]], 4 -; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT6]], 256 +; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i32 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT5]], 256 ; CHECK-NEXT: [[TMP22:%.*]] = trunc <4 x i32> [[TMP20]] to <4 x i16> ; CHECK-NEXT: [[TMP23]] = zext <4 x i16> [[TMP22]] to <4 x i32> ; CHECK-NEXT: br i1 [[TMP21]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] @@ -256,15 +250,15 @@ ; CHECK-NEXT: [[TMP24:%.*]] = trunc <4 x i32> [[TMP23]] to <4 x i16> ; CHECK-NEXT: [[TMP25:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP24]]) ; CHECK-NEXT: [[TMP26:%.*]] = zext i16 [[TMP25]] to i32 -; CHECK-NEXT: [[CMP_N2:%.*]] = icmp eq i32 256, 256 -; CHECK-NEXT: br i1 [[CMP_N2]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N1:%.*]] = icmp eq i32 256, 256 +; CHECK-NEXT: br i1 [[CMP_N1]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 256, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX7:%.*]] = phi i32 [ 0, [[ITER_CHECK]] ], [ [[TMP12]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP26]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i32 [ 0, [[ITER_CHECK]] ], [ [[TMP12]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP26]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] -; CHECK-NEXT: [[SUM_02P:%.*]] = phi i32 [ [[XOR:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX7]], [[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[SUM_02P:%.*]] = phi i32 [ [[XOR:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX6]], [[VEC_EPILOG_SCALAR_PH]] ] ; CHECK-NEXT: [[SUM_02:%.*]] = and i32 [[SUM_02P]], 65535 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i32 [[IV]] ; CHECK-NEXT: [[LOAD:%.*]] = load i16, i16* [[GEP]], align 2 @@ -272,12 +266,9 @@ ; CHECK-NEXT: [[XOR]] = or i32 [[SUM_02]], [[EXT]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 256 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: [[XOR_LCSSA1:%.*]] = phi i32 [ [[XOR]], [[FOR_BODY]] ], [ [[TMP26]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[XOR_LCSSA:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[XOR_LCSSA1]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[XOR_LCSSA:%.*]] = phi i32 [ [[XOR]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[TMP26]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[XOR_LCSSA]] to i16 ; CHECK-NEXT: ret i16 [[RET]] ; @@ -343,53 +334,49 @@ ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 1.500000e+01, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP8]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX3:%.*]] = phi float [ 1.000000e+01, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_MOD_VF6:%.*]] = urem i64 [[N]], 4 -; CHECK-NEXT: [[N_VEC7:%.*]] = sub i64 [[N]], [[N_MOD_VF6]] +; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[N]], [[N_MOD_VF4]] ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> , float [[BC_MERGE_RDX]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> zeroinitializer, float [[BC_MERGE_RDX3]], i32 0 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI10:%.*]] = phi <4 x float> [ [[TMP9]], [[VEC_EPILOG_PH]] ], [ [[TMP16:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI11:%.*]] = phi <4 x float> [ [[TMP10]], [[VEC_EPILOG_PH]] ], [ [[TMP15:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI8:%.*]] = phi <4 x float> [ [[TMP9]], [[VEC_EPILOG_PH]] ], [ [[TMP16:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi <4 x float> [ [[TMP10]], [[VEC_EPILOG_PH]] ], [ [[TMP15:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = bitcast float* [[TMP13]] to <4 x float>* -; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, <4 x float>* [[TMP14]], align 4 -; CHECK-NEXT: [[TMP15]] = fadd fast <4 x float> [[VEC_PHI11]], [[WIDE_LOAD12]] -; CHECK-NEXT: [[TMP16]] = fmul fast <4 x float> [[VEC_PHI10]], [[WIDE_LOAD12]] -; CHECK-NEXT: [[INDEX_NEXT13]] = add nuw i64 [[OFFSET_IDX]], 4 -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC7]] +; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, <4 x float>* [[TMP14]], align 4 +; CHECK-NEXT: [[TMP15]] = fadd fast <4 x float> [[VEC_PHI9]], [[WIDE_LOAD10]] +; CHECK-NEXT: [[TMP16]] = fmul fast <4 x float> [[VEC_PHI8]], [[WIDE_LOAD10]] +; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT11]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[TMP17]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[TMP18:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP15]]) ; CHECK-NEXT: [[TMP19:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP16]]) -; CHECK-NEXT: [[CMP_N8:%.*]] = icmp eq i64 [[N]], [[N_VEC7]] -; CHECK-NEXT: br i1 [[CMP_N8]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] +; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC7]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX14:%.*]] = phi float [ 1.500000e+01, [[ITER_CHECK]] ], [ [[TMP8]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX15:%.*]] = phi float [ 1.000000e+01, [[ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP18]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX12:%.*]] = phi float [ 1.500000e+01, [[ITER_CHECK]] ], [ [[TMP8]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX13:%.*]] = phi float [ 1.000000e+01, [[ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP18]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[PROD:%.*]] = phi float [ [[BC_MERGE_RDX14]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi float [ [[BC_MERGE_RDX15]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[PROD:%.*]] = phi float [ [[BC_MERGE_RDX12]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi float [ [[BC_MERGE_RDX13]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IV]] ; CHECK-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ADD]] = fadd fast float [[SUM]], [[TMP20]] ; CHECK-NEXT: [[MUL]] = fmul fast float [[PROD]], [[TMP20]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: [[ADD_LCSSA5:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP18]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[MUL_LCSSA4:%.*]] = phi float [ [[MUL]], [[FOR_BODY]] ], [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[ADD_LCSSA5]], [[FOR_END_LOOPEXIT]] ] -; CHECK-NEXT: [[MUL_LCSSA:%.*]] = phi float [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ [[MUL_LCSSA4]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[TMP18]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[MUL_LCSSA:%.*]] = phi float [ [[MUL]], [[FOR_BODY]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[DIV:%.*]] = fdiv float [[MUL_LCSSA]], [[ADD_LCSSA]] ; CHECK-NEXT: ret float [[DIV]] ; @@ -456,44 +443,41 @@ ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START_SUM]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_MOD_VF3:%.*]] = urem i64 [[N]], 4 -; CHECK-NEXT: [[N_VEC4:%.*]] = sub i64 [[N]], [[N_MOD_VF3]] +; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]] ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i32> [ [[TMP8]], [[VEC_EPILOG_PH]] ], [ [[TMP13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i32> [ [[TMP8]], [[VEC_EPILOG_PH]] ], [ [[TMP13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP12]], align 4 -; CHECK-NEXT: [[TMP13]] = sub <4 x i32> [[VEC_PHI7]], [[WIDE_LOAD8]] -; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[OFFSET_IDX]], 4 -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC4]] +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP12]], align 4 +; CHECK-NEXT: [[TMP13]] = sub <4 x i32> [[VEC_PHI6]], [[WIDE_LOAD7]] +; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP13]]) -; CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[N]], [[N_VEC4]] -; CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_COND_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[CMP_N4]], label [[FOR_COND]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i32 [ [[START_SUM]], [[ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX9:%.*]] = phi i32 [ [[START_SUM]], [[ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX10]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[SUB:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX9]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[SUB:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IV]] ; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[SUB]] = sub nsw i32 [[SUM]], [[LOAD]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] -; CHECK: for.cond.loopexit: -; CHECK-NEXT: [[SUB_LCSSA2:%.*]] = phi i32 [ [[SUB]], [[FOR_BODY]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_COND]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: for.cond: -; CHECK-NEXT: [[SUB_LCSSA]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[SUB_LCSSA2]], [[FOR_COND_LOOPEXIT]] ] +; CHECK-NEXT: [[SUB_LCSSA]] = phi i32 [ [[SUB]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nuw nsw i64 [[OUTER_IV]], 1 ; CHECK-NEXT: [[OUTER_EXITCOND_NOT:%.*]] = icmp eq i64 [[OUTER_IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[OUTER_EXITCOND_NOT]], label [[FOR_END:%.*]], label [[ITER_CHECK]] diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-liveout.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-liveout.ll --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-liveout.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-liveout.ll @@ -51,30 +51,30 @@ ; VF-TWO-CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; VF-TWO-CHECK: vec.epilog.ph: ; VF-TWO-CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; VF-TWO-CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 2 -; VF-TWO-CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF4]] +; VF-TWO-CHECK-NEXT: [[N_MOD_VF3:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 2 +; VF-TWO-CHECK-NEXT: [[N_VEC4:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF3]] ; VF-TWO-CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; VF-TWO-CHECK: vec.epilog.vector.body: -; VF-TWO-CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; VF-TWO-CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; VF-TWO-CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF-TWO-CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP10]] ; VF-TWO-CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 0 ; VF-TWO-CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <2 x i32>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x i32>, <2 x i32>* [[TMP13]], align 4 +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x i32>, <2 x i32>* [[TMP13]], align 4 ; VF-TWO-CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP10]] ; VF-TWO-CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i32 0 ; VF-TWO-CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <2 x i32>* -; VF-TWO-CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <2 x i32>, <2 x i32>* [[TMP16]], align 4 -; VF-TWO-CHECK-NEXT: [[TMP17:%.*]] = add nsw <2 x i32> [[WIDE_LOAD8]], [[WIDE_LOAD9]] -; VF-TWO-CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[OFFSET_IDX]], 2 -; VF-TWO-CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC5]] +; VF-TWO-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x i32>, <2 x i32>* [[TMP16]], align 4 +; VF-TWO-CHECK-NEXT: [[TMP17:%.*]] = add nsw <2 x i32> [[WIDE_LOAD7]], [[WIDE_LOAD8]] +; VF-TWO-CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[OFFSET_IDX]], 2 +; VF-TWO-CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC4]] ; VF-TWO-CHECK-NEXT: br i1 [[TMP18]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; VF-TWO-CHECK: vec.epilog.middle.block: -; VF-TWO-CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC5]] +; VF-TWO-CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC4]] ; VF-TWO-CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP17]], i32 1 -; VF-TWO-CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; VF-TWO-CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; VF-TWO-CHECK: vec.epilog.scalar.ph: -; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] ; VF-TWO-CHECK-NEXT: br label [[FOR_BODY:%.*]] ; VF-TWO-CHECK: for.body: ; VF-TWO-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -85,12 +85,9 @@ ; VF-TWO-CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] ; VF-TWO-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VF-TWO-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; VF-TWO-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] -; VF-TWO-CHECK: for.end.loopexit.loopexit: -; VF-TWO-CHECK-NEXT: [[ADD_LCSSA3:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; VF-TWO-CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] +; VF-TWO-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] ; VF-TWO-CHECK: for.end.loopexit: -; VF-TWO-CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ [[ADD_LCSSA3]], [[FOR_END_LOOPEXIT_LOOPEXIT]] ] +; VF-TWO-CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; VF-TWO-CHECK-NEXT: br label [[FOR_END]] ; VF-TWO-CHECK: for.end: ; VF-TWO-CHECK-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_END_LOOPEXIT]] ] diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -80,7 +80,7 @@ ; CHECK-NEXT: br i1 [[TMP23]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC4]] -; CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -95,9 +95,7 @@ ; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX4]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: for.end.loopexit.loopexit: -; CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: @@ -244,7 +242,7 @@ ; CHECK-NEXT: br i1 [[TMP35]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END5]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ] @@ -263,9 +261,7 @@ ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_014]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] -; CHECK: for.end.loopexit.loopexit: -; CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: @@ -377,7 +373,7 @@ ; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[CMP_N4]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: br i1 [[CMP_N4]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -387,9 +383,7 @@ ; CHECK-NEXT: store i8 1, i8* [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP10:![0-9]+]] -; CHECK: for.end.loopexit.loopexit: -; CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: @@ -440,7 +434,7 @@ ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.middle.block: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[CMP_N4]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[CMP_N4]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.scalar.ph: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[FOR_BODY:%.*]] @@ -450,9 +444,7 @@ ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store i8 1, i8* [[ARRAYIDX]], align 1 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[N]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK-PROFITABLE-BY-DEFAULT: for.end.loopexit.loopexit: -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[FOR_END_LOOPEXIT]] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-PROFITABLE-BY-DEFAULT: for.end.loopexit: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[FOR_END:%.*]] ; CHECK-PROFITABLE-BY-DEFAULT: for.end: