diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2949,9 +2949,16 @@ // Split the single block loop into the two loop structure described above. LoopScalarBody = OrigLoop->getHeader(); - LoopVectorPreHeader = OrigLoop->getLoopPreheader(); LoopExitBlock = OrigLoop->getExitBlock(); assert(LoopExitBlock && "Must have an exit block"); + + BasicBlock *OrigLoopPH = OrigLoop->getLoopPreheader(); + if (OrigLoopPH->size() > 1) + LoopVectorPreHeader = SplitBlock(OrigLoopPH, OrigLoopPH->getTerminator(), + DT, LI, nullptr, ""); + else + LoopVectorPreHeader = OrigLoopPH; + assert(LoopVectorPreHeader && "Invalid loop structure"); LoopVectorPreHeader->setName("vector.ph"); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll @@ -22,9 +22,11 @@ ; CHECK-NEXT: [[G_0:%.*]] = phi i32 [ undef, [[ENTRY]] ], [ [[G_1_LCSSA:%.*]], [[FOR_COND_CLEANUP]] ] ; CHECK-NEXT: [[CMP12:%.*]] = icmp ult i32 [[G_0]], 4 ; CHECK-NEXT: [[CONV:%.*]] = and i32 [[F_0]], 65535 -; CHECK-NEXT: br i1 [[CMP12]], label [[TC_CHECK:%.*]], label [[FOR_COND_CLEANUP]] -; CHECK: tc.check: +; CHECK-NEXT: br i1 [[CMP12]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP]] +; CHECK: for.body.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[G_0]] to i64 +; CHECK-NEXT: br label [[TC_CHECK:%.*]] +; CHECK: tc.check: ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 4, [[TMP0]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll b/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll @@ -19,8 +19,8 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[T:%.*]] = load i32, i32* @v, align 8 ; CHECK-NEXT: [[T1:%.*]] = load i32, i32* @a, align 4 -; CHECK-NEXT: br label [[TC_CHECK:%.*]] -; CHECK: tc.check: +; CHECK-NEXT: br label [[OUTERLOOP:%.*]] +; CHECK: outerloop: ; CHECK-NEXT: [[T2:%.*]] = phi i32 [ [[V17:%.*]], [[OUTEREND:%.*]] ], [ [[T1]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[J_0136:%.*]] = phi i32 [ [[INC144:%.*]], [[OUTEREND]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: [[SCORE_1135:%.*]] = phi i32 [ [[CALL142:%.*]], [[OUTEREND]] ], [ -939524096, [[ENTRY]] ] @@ -34,6 +34,8 @@ ; CHECK-NEXT: [[ARRAYIDX113:%.*]] = getelementptr inbounds float, float* [[T7]], i32 [[T2]] ; CHECK-NEXT: [[T8:%.*]] = load float, float* [[ARRAYIDX113]], align 4 ; CHECK-NEXT: [[CONV114:%.*]] = fpext float [[T8]] to double +; CHECK-NEXT: br label [[TC_CHECK:%.*]] +; CHECK: tc.check: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[T]], 2 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -105,7 +107,7 @@ ; CHECK-NEXT: [[ARRAYIDX102:%.*]] = getelementptr inbounds i32, i32* @a, i32 [[INC144]] ; CHECK-NEXT: [[V17]] = load i32, i32* [[ARRAYIDX102]], align 4 ; CHECK-NEXT: [[CMP103:%.*]] = icmp sgt i32 [[V17]], -1 -; CHECK-NEXT: br i1 [[CMP103]], label [[TC_CHECK]], label [[EXIT:%.*]] +; CHECK-NEXT: br i1 [[CMP103]], label [[OUTERLOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret i32 [[CALL142]] ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll --- a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll @@ -29,7 +29,7 @@ ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[K:%.*]] to i64 -; CHECK-NEXT: br label [[TC_CHECK:%.*]] +; CHECK-NEXT: br label [[FOR_BODY3_LR_PH_US:%.*]] ; CHECK: for.end.us: ; CHECK-NEXT: [[ARRAYIDX9_US:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV33:%.*]] ; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX9_US]], align 4, !llvm.mem.parallel_loop_access !0 @@ -38,7 +38,7 @@ ; CHECK-NEXT: [[INDVARS_IV_NEXT34:%.*]] = add i64 [[INDVARS_IV33]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV35:%.*]] = trunc i64 [[INDVARS_IV_NEXT34]] to i32 ; CHECK-NEXT: [[EXITCOND36:%.*]] = icmp eq i32 [[LFTR_WIDEIV35]], [[M]] -; CHECK-NEXT: br i1 [[EXITCOND36]], label [[FOR_END15_LOOPEXIT:%.*]], label [[TC_CHECK]], !llvm.loop !2 +; CHECK-NEXT: br i1 [[EXITCOND36]], label [[FOR_END15_LOOPEXIT:%.*]], label [[FOR_BODY3_LR_PH_US]], !llvm.loop !2 ; CHECK: for.body3.us: ; CHECK-NEXT: [[INDVARS_IV29:%.*]] = phi i64 [ [[BC_RESUME_VAL:%.*]], [[SCALAR_PH:%.*]] ], [ [[INDVARS_IV_NEXT30:%.*]], [[FOR_BODY3_US:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[INDVARS_IV29]] to i32 @@ -52,13 +52,15 @@ ; CHECK-NEXT: [[LFTR_WIDEIV31:%.*]] = trunc i64 [[INDVARS_IV_NEXT30]] to i32 ; CHECK-NEXT: [[EXITCOND32:%.*]] = icmp eq i32 [[LFTR_WIDEIV31]], [[M]] ; CHECK-NEXT: br i1 [[EXITCOND32]], label [[FOR_END_US:%.*]], label [[FOR_BODY3_US]], !llvm.loop !3 -; CHECK: tc.check: +; CHECK: for.body3.lr.ph.us: ; CHECK-NEXT: [[INDVARS_IV33]] = phi i64 [ [[INDVARS_IV_NEXT34]], [[FOR_END_US]] ], [ 0, [[FOR_BODY3_LR_PH_US_PREHEADER]] ] ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP3]], [[INDVARS_IV33]] ; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[INDVARS_IV33]] to i32 ; CHECK-NEXT: [[ADD_US]] = add i32 [[TMP9]], [[K]] ; CHECK-NEXT: [[ARRAYIDX7_US]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV33]] +; CHECK-NEXT: br label [[TC_CHECK:%.*]] +; CHECK: tc.check: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH]], label [[VECTOR_SCEVCHECK:%.*]] ; CHECK: vector.scevcheck: diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll @@ -6,15 +6,17 @@ define i32 @inv_load_conditional(i32* %a, i64 %n, i32* %b, i32 %k) { ; CHECK-LABEL: @inv_load_conditional( -; CHECK-NEXT: tc.check: +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8* +; CHECK-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8* ; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32 +; CHECK-NEXT: br label [[TC_CHECK:%.*]] +; CHECK: tc.check: ; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N]], 1 ; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 16 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: -; CHECK-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8* -; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8* ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[N]], 1 ; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP1]], i64 [[N]], i64 1 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX2]] @@ -46,7 +48,7 @@ ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x i32> [[PREDPHI]], i32 15 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[TC_CHECK:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[TC_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll @@ -45,15 +45,17 @@ ; if (b[i] == k) a = ntrunc define void @inv_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* %b, i32 %k) { ; CHECK-LABEL: @inv_val_store_to_inv_address_conditional( -; CHECK-NEXT: tc.check: +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8* +; CHECK-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8* ; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32 +; CHECK-NEXT: br label [[TC_CHECK:%.*]] +; CHECK: tc.check: ; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N]], 1 ; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 16 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: -; CHECK-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8* -; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8* ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[N]], 1 ; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP1]], i64 [[N]], i64 1 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX2]] @@ -87,7 +89,7 @@ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[TC_CHECK:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[TC_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -133,16 +135,18 @@ define void @variant_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* %b, i32* %c, i32 %k) { ; CHECK-LABEL: @variant_val_store_to_inv_address_conditional( -; CHECK-NEXT: tc.check: +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8* +; CHECK-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8* +; CHECK-NEXT: [[C5:%.*]] = bitcast i32* [[C:%.*]] to i8* ; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32 +; CHECK-NEXT: br label [[TC_CHECK:%.*]] +; CHECK: tc.check: ; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N]], 1 ; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 16 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: -; CHECK-NEXT: [[C5:%.*]] = bitcast i32* [[C:%.*]] to i8* -; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8* -; CHECK-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8* ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[N]], 1 ; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP1]], i64 [[N]], i64 1 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX2]] @@ -188,7 +192,7 @@ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[TC_CHECK:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[TC_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll --- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -16,10 +16,12 @@ ;; following tests are written more generically. define i32 @test_explicit_pred(i64 %len) { ; CHECK-LABEL: @test_explicit_pred( -; CHECK-NEXT: tc.check: +; CHECK-NEXT: entry: ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [4096 x i32] ; CHECK-NEXT: [[BASE:%.*]] = bitcast [4096 x i32]* [[ALLOCA]] to i32* ; CHECK-NEXT: call void @init(i32* [[BASE]]) +; CHECK-NEXT: br label [[TC_CHECK:%.*]] +; CHECK: tc.check: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[LEN:%.*]], i32 0 @@ -105,7 +107,7 @@ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 4096, 4096 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[TC_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[TC_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[TC_CHECK]] ], [ [[TMP41]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -155,10 +157,12 @@ ;; Similiar to the above, but without an analyzeable condition. define i32 @test_explicit_pred_generic(i64 %len, i1* %test_base) { ; CHECK-LABEL: @test_explicit_pred_generic( -; CHECK-NEXT: tc.check: +; CHECK-NEXT: entry: ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [4096 x i32] ; CHECK-NEXT: [[BASE:%.*]] = bitcast [4096 x i32]* [[ALLOCA]] to i32* ; CHECK-NEXT: call void @init(i32* [[BASE]]) +; CHECK-NEXT: br label [[TC_CHECK:%.*]] +; CHECK: tc.check: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -281,7 +285,7 @@ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 4096, 4096 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[TC_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[TC_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[TC_CHECK]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -336,10 +340,12 @@ ; TODO: currently shows predication which can be removed define i32 @test_invariant_address(i64 %len, i1* %test_base) { ; CHECK-LABEL: @test_invariant_address( -; CHECK-NEXT: tc.check: +; CHECK-NEXT: entry: ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [4096 x i32] ; CHECK-NEXT: [[BASE:%.*]] = bitcast [4096 x i32]* [[ALLOCA]] to i32* ; CHECK-NEXT: call void @init(i32* [[BASE]]) +; CHECK-NEXT: br label [[TC_CHECK:%.*]] +; CHECK: tc.check: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -478,7 +484,7 @@ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 4096, 4096 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[TC_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[TC_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[TC_CHECK]] ], [ [[TMP105]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -528,10 +534,12 @@ ; Overlapping loads - Fails alignment checking, not dereferenceability define i32 @test_step_narrower_than_access(i64 %len, i1* %test_base) { ; CHECK-LABEL: @test_step_narrower_than_access( -; CHECK-NEXT: tc.check: +; CHECK-NEXT: entry: ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [4096 x i32] ; CHECK-NEXT: [[BASE:%.*]] = bitcast [4096 x i32]* [[ALLOCA]] to i32* ; CHECK-NEXT: call void @init(i32* [[BASE]]) +; CHECK-NEXT: br label [[TC_CHECK:%.*]] +; CHECK: tc.check: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -814,7 +822,7 @@ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 4096, 4096 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[TC_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[TC_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[TC_CHECK]] ], [ [[TMP185]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -869,12 +877,14 @@ define i32 @test_max_trip_count(i64 %len, i1* %test_base, i64 %n) { ; CHECK-LABEL: @test_max_trip_count( -; CHECK-NEXT: tc.check: +; CHECK-NEXT: entry: ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [4096 x i32] ; CHECK-NEXT: [[BASE:%.*]] = bitcast [4096 x i32]* [[ALLOCA]] to i32* ; CHECK-NEXT: call void @init(i32* [[BASE]]) ; CHECK-NEXT: [[MIN_CMP:%.*]] = icmp ult i64 4096, [[N:%.*]] ; CHECK-NEXT: [[MIN_N:%.*]] = select i1 [[MIN_CMP]], i64 4096, i64 [[N]] +; CHECK-NEXT: br label [[TC_CHECK:%.*]] +; CHECK: tc.check: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[MIN_N]], 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] @@ -1001,7 +1011,7 @@ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[TC_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[TC_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[TC_CHECK]] ], [ [[TMP86]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -1057,10 +1067,12 @@ define i32 @test_non_zero_start(i64 %len, i1* %test_base) { ; CHECK-LABEL: @test_non_zero_start( -; CHECK-NEXT: tc.check: +; CHECK-NEXT: entry: ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [4096 x i32] ; CHECK-NEXT: [[BASE:%.*]] = bitcast [4096 x i32]* [[ALLOCA]] to i32* ; CHECK-NEXT: call void @init(i32* [[BASE]]) +; CHECK-NEXT: br label [[TC_CHECK:%.*]] +; CHECK: tc.check: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1184,7 +1196,7 @@ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 3072, 3072 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 1024, [[TC_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 1024, [[TC_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[TC_CHECK]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -1290,10 +1302,12 @@ ;; TODO: handle non-unit strides define i32 @test_non_unit_stride(i64 %len, i1* %test_base) { ; CHECK-LABEL: @test_non_unit_stride( -; CHECK-NEXT: tc.check: +; CHECK-NEXT: entry: ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [4096 x i32] ; CHECK-NEXT: [[BASE:%.*]] = bitcast [4096 x i32]* [[ALLOCA]] to i32* ; CHECK-NEXT: call void @init(i32* [[BASE]]) +; CHECK-NEXT: br label [[TC_CHECK:%.*]] +; CHECK: tc.check: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1545,7 +1559,7 @@ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 2048, 2048 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[TC_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[TC_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[TC_CHECK]] ], [ [[TMP153]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -1596,10 +1610,12 @@ define i32 @neg_off_by_many(i64 %len, i1* %test_base) { ; CHECK-LABEL: @neg_off_by_many( -; CHECK-NEXT: tc.check: +; CHECK-NEXT: entry: ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [1024 x i32] ; CHECK-NEXT: [[BASE:%.*]] = bitcast [1024 x i32]* [[ALLOCA]] to i32* ; CHECK-NEXT: call void @init(i32* [[BASE]]) +; CHECK-NEXT: br label [[TC_CHECK:%.*]] +; CHECK: tc.check: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1722,7 +1738,7 @@ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 4096, 4096 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[TC_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[TC_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[TC_CHECK]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -1773,10 +1789,12 @@ define i32 @neg_off_by_one_iteration(i64 %len, i1* %test_base) { ; CHECK-LABEL: @neg_off_by_one_iteration( -; CHECK-NEXT: tc.check: +; CHECK-NEXT: entry: ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [4095 x i32] ; CHECK-NEXT: [[BASE:%.*]] = bitcast [4095 x i32]* [[ALLOCA]] to i32* ; CHECK-NEXT: call void @init(i32* [[BASE]]) +; CHECK-NEXT: br label [[TC_CHECK:%.*]] +; CHECK: tc.check: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1899,7 +1917,7 @@ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 4096, 4096 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[TC_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[TC_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[TC_CHECK]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -1950,10 +1968,12 @@ define i32 @neg_off_by_one_byte(i64 %len, i1* %test_base) { ; CHECK-LABEL: @neg_off_by_one_byte( -; CHECK-NEXT: tc.check: +; CHECK-NEXT: entry: ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [16383 x i8] ; CHECK-NEXT: [[BASE:%.*]] = bitcast [16383 x i8]* [[ALLOCA]] to i32* ; CHECK-NEXT: call void @init(i32* [[BASE]]) +; CHECK-NEXT: br label [[TC_CHECK:%.*]] +; CHECK: tc.check: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -2076,7 +2096,7 @@ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 4096, 4096 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[TC_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[TC_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[TC_CHECK]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll --- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll @@ -1943,9 +1943,11 @@ ; AVX1-LABEL: @foo7( ; AVX1-NEXT: entry: ; AVX1-NEXT: [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0 -; AVX1-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[TC_CHECK:%.*]] -; AVX1: tc.check: +; AVX1-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]] +; AVX1: for.body.preheader: ; AVX1-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64 +; AVX1-NEXT: br label [[TC_CHECK:%.*]] +; AVX1: tc.check: ; AVX1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16 ; AVX1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; AVX1: vector.ph: @@ -2073,9 +2075,11 @@ ; AVX2-LABEL: @foo7( ; AVX2-NEXT: entry: ; AVX2-NEXT: [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0 -; AVX2-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[TC_CHECK:%.*]] -; AVX2: tc.check: +; AVX2-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]] +; AVX2: for.body.preheader: ; AVX2-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64 +; AVX2-NEXT: br label [[TC_CHECK:%.*]] +; AVX2: tc.check: ; AVX2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16 ; AVX2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; AVX2: vector.ph: @@ -2203,9 +2207,11 @@ ; AVX512-LABEL: @foo7( ; AVX512-NEXT: entry: ; AVX512-NEXT: [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0 -; AVX512-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[TC_CHECK:%.*]] -; AVX512: tc.check: +; AVX512-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]] +; AVX512: for.body.preheader: ; AVX512-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64 +; AVX512-NEXT: br label [[TC_CHECK:%.*]] +; AVX512: tc.check: ; AVX512-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32 ; AVX512-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; AVX512: vector.ph: @@ -2378,9 +2384,11 @@ ; AVX1-LABEL: @foo8( ; AVX1-NEXT: entry: ; AVX1-NEXT: [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0 -; AVX1-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[TC_CHECK:%.*]] -; AVX1: tc.check: +; AVX1-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]] +; AVX1: for.body.preheader: ; AVX1-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64 +; AVX1-NEXT: br label [[TC_CHECK:%.*]] +; AVX1: tc.check: ; AVX1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16 ; AVX1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; AVX1: vector.ph: @@ -2508,9 +2516,11 @@ ; AVX2-LABEL: @foo8( ; AVX2-NEXT: entry: ; AVX2-NEXT: [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0 -; AVX2-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[TC_CHECK:%.*]] -; AVX2: tc.check: +; AVX2-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]] +; AVX2: for.body.preheader: ; AVX2-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64 +; AVX2-NEXT: br label [[TC_CHECK:%.*]] +; AVX2: tc.check: ; AVX2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16 ; AVX2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; AVX2: vector.ph: @@ -2638,9 +2648,11 @@ ; AVX512-LABEL: @foo8( ; AVX512-NEXT: entry: ; AVX512-NEXT: [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0 -; AVX512-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[TC_CHECK:%.*]] -; AVX512: tc.check: +; AVX512-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]] +; AVX512: for.body.preheader: ; AVX512-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64 +; AVX512-NEXT: br label [[TC_CHECK:%.*]] +; AVX512: tc.check: ; AVX512-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32 ; AVX512-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; AVX512: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll --- a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll @@ -8,12 +8,14 @@ define void @foo(i8 addrspace(1)* align 8 dereferenceable_or_null(16), i8 addrspace(1)* align 8 dereferenceable_or_null(8), i64) #0 { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[TC_CHECK:%.*]] -; CHECK: tc.check: +; CHECK-NEXT: br label [[PREHEADER:%.*]] +; CHECK: preheader: ; CHECK-NEXT: [[DOT10:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP0:%.*]], i64 16 ; CHECK-NEXT: [[DOT11:%.*]] = bitcast i8 addrspace(1)* [[DOT10]] to i8 addrspace(1)* addrspace(1)* ; CHECK-NEXT: [[DOT12:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP1:%.*]], i64 16 ; CHECK-NEXT: [[DOT13:%.*]] = bitcast i8 addrspace(1)* [[DOT12]] to i8 addrspace(1)* addrspace(1)* +; CHECK-NEXT: br label [[TC_CHECK:%.*]] +; CHECK: tc.check: ; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP2:%.*]], 1 ; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP3]], i64 [[TMP2]], i64 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX]], 16 diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll --- a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll @@ -32,10 +32,12 @@ ; CHECK-NEXT: [[STOREMERGE_IN9:%.*]] = phi i32 [ [[TMP2]], [[FOR_BODY_LR_PH]] ], [ [[ADD:%.*]], [[FOR_INC9:%.*]] ] ; CHECK-NEXT: [[CONV52:%.*]] = and i32 [[STOREMERGE_IN9]], 255 ; CHECK-NEXT: [[CMP63:%.*]] = icmp ult i32 [[TMP2]], [[CONV52]] -; CHECK-NEXT: br i1 [[CMP63]], label [[TC_CHECK:%.*]], label [[FOR_INC9]] -; CHECK: tc.check: +; CHECK-NEXT: br i1 [[CMP63]], label [[FOR_BODY8_LR_PH:%.*]], label [[FOR_INC9]] +; CHECK: for.body8.lr.ph: ; CHECK-NEXT: [[CONV3:%.*]] = trunc i32 [[STOREMERGE_IN9]] to i8 ; CHECK-NEXT: [[DOTPROMOTED:%.*]] = load i32, i32* getelementptr inbounds ([192 x [192 x i32]], [192 x [192 x i32]]* @a, i64 0, i64 0, i64 0), align 16 +; CHECK-NEXT: br label [[TC_CHECK:%.*]] +; CHECK: tc.check: ; CHECK-NEXT: [[TMP3:%.*]] = add i8 [[CONV3]], -1 ; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll --- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll @@ -90,7 +90,7 @@ ; And-ed with the gaps mask. ;ENABLED_MASKED_STRIDED-LABEL: @masked_strided1_optsize( -;ENABLED_MASKED_STRIDED-NEXT: tc.check: +;ENABLED_MASKED_STRIDED-NEXT: entry: ;ENABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32 ;ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0 ;ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer @@ -197,7 +197,7 @@ ; ENABLED_MASKED_STRIDED-NEXT: entry: ; ENABLED_MASKED_STRIDED-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[CMP9]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]] -; ENABLED_MASKED_STRIDED: vector.ph: +; ENABLED_MASKED_STRIDED: for.body.lr.ph: ; ENABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32 ; ENABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7 ; ENABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 @@ -295,7 +295,7 @@ ; ENABLED_MASKED_STRIDED-NEXT: entry: ; ENABLED_MASKED_STRIDED-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[CMP9]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]] -; ENABLED_MASKED_STRIDED: vector.ph: +; ENABLED_MASKED_STRIDED: for.body.lr.ph: ; ENABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32 ; ENABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7 ; ENABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -169,7 +169,7 @@ ;} ; VEC4_INTERL1-LABEL: @fp_iv_loop3( -; VEC4_INTERL1: tc.check: +; VEC4_INTERL1: for.body.lr.ph: ; VEC4_INTERL1: [[TMP0:%.*]] = load float, float* @fp_inc, align 4 ; VEC4_INTERL1: vector.ph: ; VEC4_INTERL1: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll b/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll --- a/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll +++ b/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll @@ -46,8 +46,10 @@ define float @minloopattr(float* nocapture readonly %arg) #0 { ; CHECK-LABEL: @minloopattr( -; CHECK-NEXT: tc.check: +; CHECK-NEXT: top: ; CHECK-NEXT: [[T:%.*]] = load float, float* [[ARG:%.*]] +; CHECK-NEXT: br label [[TC_CHECK:%.*]] +; CHECK: tc.check: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[T]], i32 0 @@ -82,7 +84,7 @@ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 65536, 65536 ; CHECK-NEXT: br i1 [[CMP_N]], label [[OUT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 65537, [[MIDDLE_BLOCK]] ], [ 1, [[TC_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 65537, [[MIDDLE_BLOCK]] ], [ 1, [[TC_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[T]], [[TC_CHECK]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: diff --git a/llvm/test/Transforms/LoopVectorize/induction-step.ll b/llvm/test/Transforms/LoopVectorize/induction-step.ll --- a/llvm/test/Transforms/LoopVectorize/induction-step.ll +++ b/llvm/test/Transforms/LoopVectorize/induction-step.ll @@ -12,7 +12,7 @@ ;} ; CHECK-LABEL: @induction_with_global( -; CHECK: tc.check: +; CHECK: for.body.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @int_inc, align 4 ; CHECK: vector.ph: ; CHECK: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 %init, i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -529,7 +529,7 @@ ; is defined before the branch to the scalar preheader. ; CHECK-LABEL: testoverflowcheck -; CHECK: tc.check: +; CHECK: entry: ; CHECK: %[[LOAD:.*]] = load i8 ; CHECK: br diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll --- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll @@ -164,15 +164,17 @@ ; value. ; scalar store the value extracted from the last element of the vector value. ; CHECK-LABEL: inv_val_store_to_inv_address_conditional_diff_values_ic -; CHECK-NEXT: tc.check: +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8* +; CHECK-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8* ; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32 +; CHECK-NEXT: br label [[TC_CHECK:%.*]] +; CHECK: tc.check: ; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N]], 1 ; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: -; CHECK-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8* -; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8* ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[N]], 1 ; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP1]], i64 [[N]], i64 1 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX2]] @@ -269,16 +271,18 @@ ; Since the condition and the phi is loop invariant, they are LICM'ed after ; vectorization. ; CHECK-LABEL: inv_val_store_to_inv_address_conditional_inv -; CHECK-NEXT: tc.check: +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8* +; CHECK-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8* ; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[NTRUNC]], [[K:%.*]] +; CHECK-NEXT: br label [[TC_CHECK:%.*]] +; CHECK: tc.check: ; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N]], 1 ; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: -; CHECK-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8* -; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8* ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[N]], 1 ; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP1]], i64 [[N]], i64 1 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX2]] @@ -365,14 +369,16 @@ ; last element from the variant vector and scalar stores it into the uniform ; address. ; CHECK-LABEL: variant_val_store_to_inv_address -; CHECK-NEXT: tc.check: +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A1:%.*]] = bitcast i32* [[A:%.*]] to i8* +; CHECK-NEXT: [[B2:%.*]] = bitcast i32* [[B:%.*]] to i8* +; CHECK-NEXT: br label [[TC_CHECK:%.*]] +; CHECK: tc.check: ; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N:%.*]], 1 ; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: -; CHECK-NEXT: [[B2:%.*]] = bitcast i32* [[B:%.*]] to i8* -; CHECK-NEXT: [[A1:%.*]] = bitcast i32* [[A:%.*]] to i8* ; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A1]], i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[N]], 1 ; CHECK-NEXT: [[SMAX3:%.*]] = select i1 [[TMP1]], i64 [[N]], i64 1 diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -141,7 +141,7 @@ ; CHECK: middle.block ; CHECK: %[[CMP:.+]] = icmp eq i32 %[[T5]], %[[N_VEC]] ; CHECK: %ind.escape = add i32 %[[T15]], -; CHECK: br i1 %[[CMP]], label %tc.check, label %scalar.ph +; CHECK: br i1 %[[CMP]], label %BB3, label %scalar.ph define void @PR30742() { BB0: br label %BB1 diff --git a/llvm/test/Transforms/LoopVectorize/pr30806.ll b/llvm/test/Transforms/LoopVectorize/pr30806.ll --- a/llvm/test/Transforms/LoopVectorize/pr30806.ll +++ b/llvm/test/Transforms/LoopVectorize/pr30806.ll @@ -40,7 +40,7 @@ ; Verify that a 'udiv' does appear between the 'loop2.preheader' label, and ; whatever label comes next. loop2.preheader: -; CHECK-LABEL: tc.check: +; CHECK-LABEL: loop2.preheader: ; CHECK: udiv ; CHECK-LABEL: : %lim = udiv i32 %numer, %denom