diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3219,6 +3219,14 @@ Optional VectorizedLoopID = makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll, LLVMLoopVectorizeFollowupVectorized}); + + // If VFxUF is 2 and vector loop is not skipped then remainder executes once. + if (VF * UF == 2 && !areSafetyChecksAdded() && + !Cost->requiresScalarEpilogue()) { + ScalarLatchBr->setCondition( + Builder.getInt1(ScalarLatchBr->getSuccessor(0) == LoopExitBlock)); + } + if (VectorizedLoopID.hasValue()) { L->setLoopID(VectorizedLoopID.getValue()); @@ -3691,7 +3699,7 @@ // loop iterations are now distributed among them. Note that original loop // represented by LoopScalarBody becomes remainder loop after vectorization. // - // For cases like foldTailByMasking() and requiresScalarEpiloque() we may + // For cases like foldTailByMasking() and requiresScalarEpilogue() we may // end up getting slightly roughened result but that should be OK since // profile is not inherently precise anyway. Note also possible bypass of // vector code caused by legality checks is ignored, assigning all the weight diff --git a/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll b/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll @@ -93,7 +93,7 @@ ; CHECK-NEXT: [[SUB127]] = fsub fast double [[DVAL1_4131]], [[MUL126]] ; CHECK-NEXT: [[INC129]] = add nuw nsw i32 [[I_2132]], 1 ; CHECK-NEXT: [[EXITCOND143:%.*]] = icmp eq i32 [[INC129]], [[T]] -; CHECK-NEXT: br i1 [[EXITCOND143]], label [[OUTEREND]], label [[INNERLOOP]], !llvm.loop !2 +; CHECK-NEXT: br i1 true, label [[OUTEREND]], label [[INNERLOOP]], !llvm.loop !2 ; CHECK: outerend: ; CHECK-NEXT: [[SUB127_LCSSA:%.*]] = phi double [ [[SUB127]], [[INNERLOOP]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[CONV138:%.*]] = fptosi double [[SUB127_LCSSA]] to i32 diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll --- a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll @@ -79,7 +79,7 @@ ; CHECK-NEXT: store i32 [[SCALAR_RECUR]], i32* [[B_PTR]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 5 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop !2 +; CHECK-NEXT: br i1 true, label [[EXIT]], label [[LOOP]], !llvm.loop !2 ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll --- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll @@ -30,7 +30,7 @@ ; CHECK-NEXT: store <2 x i16*> , <2 x i16*>* [[TMP4]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 2 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 2, 2 ; CHECK-NEXT: br i1 [[CMP_N]], label [[BB3:%.*]], label [[SCALAR_PH]] @@ -44,10 +44,10 @@ ; CHECK-NEXT: [[_TMP4:%.*]] = bitcast %rec8* [[_TMP2]] to i16* ; CHECK-NEXT: [[_TMP6:%.*]] = sext i16 [[C_1_0]] to i64 ; CHECK-NEXT: [[_TMP7:%.*]] = getelementptr [2 x i16*], [2 x i16*]* @b, i16 0, i64 [[_TMP6]] -; CHECK-NEXT: store i16* [[_TMP4]], i16** [[_TMP7]] +; CHECK-NEXT: store i16* [[_TMP4]], i16** [[_TMP7]], align 8 ; CHECK-NEXT: [[_TMP9]] = add nsw i16 [[C_1_0]], 1 ; CHECK-NEXT: [[_TMP11:%.*]] = icmp slt i16 [[_TMP9]], 2 -; CHECK-NEXT: br i1 [[_TMP11]], label [[BB2]], label [[BB3]], !llvm.loop !2 +; CHECK-NEXT: br i1 false, label [[BB2]], label [[BB3]], [[LOOP2:!llvm.loop !.*]] ; CHECK: bb3: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -39,19 +39,18 @@ ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 128, 128 ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] ; UNROLL: for.body: -; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 128, [[MIDDLE_BLOCK]] ] -; UNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 [[INDVARS_IV]] +; UNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 128 ; UNROLL-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 ; UNROLL-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP9]], 100 -; UNROLL-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; UNROLL-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] ; UNROLL: if.then: ; UNROLL-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 20 ; UNROLL-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX]], align 4 ; UNROLL-NEXT: br label [[FOR_INC]] ; UNROLL: for.inc: -; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; UNROLL-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 128, 1 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128 -; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] +; UNROLL-NEXT: br label [[FOR_END]] ; UNROLL: for.end: ; UNROLL-NEXT: ret i32 0 ; @@ -104,7 +103,7 @@ ; UNROLL-NOSIMPLIFY: for.inc: ; UNROLL-NOSIMPLIFY-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] +; UNROLL-NOSIMPLIFY-NEXT: br i1 true, label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] ; UNROLL-NOSIMPLIFY: for.end: ; UNROLL-NOSIMPLIFY-NEXT: ret i32 0 ; @@ -144,19 +143,18 @@ ; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 128, 128 ; VEC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] ; VEC: for.body: -; VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 128, [[MIDDLE_BLOCK]] ] -; VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 [[INDVARS_IV]] +; VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 128 ; VEC-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 ; VEC-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP14]], 100 -; VEC-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; VEC-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] ; VEC: if.then: ; VEC-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], 20 ; VEC-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX]], align 4 ; VEC-NEXT: br label [[FOR_INC]] ; VEC: for.inc: -; VEC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; VEC-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 128, 1 ; VEC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128 -; VEC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] +; VEC-NEXT: br label [[FOR_END]] ; VEC: for.end: ; VEC-NEXT: ret i32 0 ; @@ -196,24 +194,7 @@ ; UNROLL-NEXT: entry: ; UNROLL-NEXT: [[TMP0:%.*]] = xor i1 [[COND:%.*]], true ; UNROLL-NEXT: call void @llvm.assume(i1 [[TMP0]]) -; UNROLL-NEXT: br label [[FOR_BODY14:%.*]] -; UNROLL: for.body14: -; UNROLL-NEXT: [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ undef, [[ENTRY:%.*]] ] -; UNROLL-NEXT: [[INEWCHUNKS_120:%.*]] = phi i32 [ [[INEWCHUNKS_2:%.*]], [[FOR_INC23]] ], [ undef, [[ENTRY]] ] -; UNROLL-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDVARS_IV3]] -; UNROLL-NEXT: [[TMP:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4 -; UNROLL-NEXT: br i1 undef, label [[IF_THEN18:%.*]], label [[FOR_INC23]] -; UNROLL: if.then18: -; UNROLL-NEXT: store i32 2, i32* [[ARRAYIDX16]], align 4 -; UNROLL-NEXT: [[INC21:%.*]] = add nsw i32 [[INEWCHUNKS_120]], 1 -; UNROLL-NEXT: br label [[FOR_INC23]] -; UNROLL: for.inc23: -; UNROLL-NEXT: [[INEWCHUNKS_2]] = phi i32 [ [[INC21]], [[IF_THEN18]] ], [ [[INEWCHUNKS_120]], [[FOR_BODY14]] ] -; UNROLL-NEXT: [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1 -; UNROLL-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32 -; UNROLL-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0 -; UNROLL-NEXT: call void @llvm.assume(i1 [[CMP13]]) -; UNROLL-NEXT: br label [[FOR_BODY14]] +; UNROLL-NEXT: unreachable ; ; UNROLL-NOSIMPLIFY-LABEL: @bug18724( ; UNROLL-NOSIMPLIFY-NEXT: entry: @@ -279,7 +260,7 @@ ; UNROLL-NOSIMPLIFY-NEXT: [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32 ; UNROLL-NOSIMPLIFY-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[CMP13]], label [[FOR_BODY14]], label [[FOR_INC26_LOOPEXIT]], [[LOOP4:!llvm.loop !.*]] +; UNROLL-NOSIMPLIFY-NEXT: br i1 false, label [[FOR_BODY14]], label [[FOR_INC26_LOOPEXIT]], [[LOOP4:!llvm.loop !.*]] ; UNROLL-NOSIMPLIFY: for.inc26.loopexit: ; UNROLL-NOSIMPLIFY-NEXT: [[INEWCHUNKS_2_LCSSA:%.*]] = phi i32 [ [[INEWCHUNKS_2]], [[FOR_INC23]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] ; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_INC26]] @@ -291,24 +272,7 @@ ; VEC-NEXT: entry: ; VEC-NEXT: [[TMP0:%.*]] = xor i1 [[COND:%.*]], true ; VEC-NEXT: call void @llvm.assume(i1 [[TMP0]]) -; VEC-NEXT: br label [[FOR_BODY14:%.*]] -; VEC: for.body14: -; VEC-NEXT: [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ undef, [[ENTRY:%.*]] ] -; VEC-NEXT: [[INEWCHUNKS_120:%.*]] = phi i32 [ [[INEWCHUNKS_2:%.*]], [[FOR_INC23]] ], [ undef, [[ENTRY]] ] -; VEC-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDVARS_IV3]] -; VEC-NEXT: [[TMP:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4 -; VEC-NEXT: br i1 undef, label [[IF_THEN18:%.*]], label [[FOR_INC23]] -; VEC: if.then18: -; VEC-NEXT: store i32 2, i32* [[ARRAYIDX16]], align 4 -; VEC-NEXT: [[INC21:%.*]] = add nsw i32 [[INEWCHUNKS_120]], 1 -; VEC-NEXT: br label [[FOR_INC23]] -; VEC: for.inc23: -; VEC-NEXT: [[INEWCHUNKS_2]] = phi i32 [ [[INC21]], [[IF_THEN18]] ], [ [[INEWCHUNKS_120]], [[FOR_BODY14]] ] -; VEC-NEXT: [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1 -; VEC-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32 -; VEC-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0 -; VEC-NEXT: call void @llvm.assume(i1 [[CMP13]]) -; VEC-NEXT: br label [[FOR_BODY14]] +; VEC-NEXT: unreachable ; entry: br label %for.body9 @@ -371,26 +335,24 @@ ; UNROLL: pred.store.continue6: ; UNROLL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; UNROLL-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef -; UNROLL-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP3:!llvm.loop !.*]] +; UNROLL-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP2:!llvm.loop !.*]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] ; UNROLL: for.body: -; UNROLL-NEXT: [[TMP0:%.*]] = phi i64 [ [[TMP6:%.*]], [[FOR_INC:%.*]] ], [ undef, [[MIDDLE_BLOCK]] ] -; UNROLL-NEXT: [[TMP1:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ undef, [[MIDDLE_BLOCK]] ] -; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 [[TMP0]] +; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 undef ; UNROLL-NEXT: [[TMP3:%.*]] = load i8, i8* [[TMP2]], align 1 -; UNROLL-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; UNROLL-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] ; UNROLL: if.then: ; UNROLL-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32 ; UNROLL-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 ; UNROLL-NEXT: store i8 [[TMP5]], i8* [[TMP2]], align 1 ; UNROLL-NEXT: br label [[FOR_INC]] ; UNROLL: for.inc: -; UNROLL-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 -; UNROLL-NEXT: [[TMP7]] = add i64 [[TMP1]], -1 +; UNROLL-NEXT: [[TMP6:%.*]] = add nuw nsw i64 undef, 1 +; UNROLL-NEXT: [[TMP7:%.*]] = add i64 undef, -1 ; UNROLL-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 -; UNROLL-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP4:!llvm.loop !.*]] +; UNROLL-NEXT: br label [[FOR_END]] ; UNROLL: for.end: ; UNROLL-NEXT: ret void ; @@ -449,7 +411,7 @@ ; UNROLL-NOSIMPLIFY-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP7]] = add i64 [[TMP1]], -1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP6:!llvm.loop !.*]] +; UNROLL-NOSIMPLIFY-NEXT: br i1 true, label [[FOR_END]], label [[FOR_BODY]], [[LOOP6:!llvm.loop !.*]] ; UNROLL-NOSIMPLIFY: for.end: ; UNROLL-NOSIMPLIFY-NEXT: ret void ; @@ -489,26 +451,24 @@ ; VEC: pred.store.continue3: ; VEC-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; VEC-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef -; VEC-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] +; VEC-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP2:!llvm.loop !.*]] ; VEC: middle.block: ; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef ; VEC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] ; VEC: for.body: -; VEC-NEXT: [[TMP0:%.*]] = phi i64 [ [[TMP6:%.*]], [[FOR_INC:%.*]] ], [ undef, [[MIDDLE_BLOCK]] ] -; VEC-NEXT: [[TMP1:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ undef, [[MIDDLE_BLOCK]] ] -; VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 [[TMP0]] +; VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 undef ; VEC-NEXT: [[TMP3:%.*]] = load i8, i8* [[TMP2]], align 1 -; VEC-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; VEC-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] ; VEC: if.then: ; VEC-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32 ; VEC-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 ; VEC-NEXT: store i8 [[TMP5]], i8* [[TMP2]], align 1 ; VEC-NEXT: br label [[FOR_INC]] ; VEC: for.inc: -; VEC-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 -; VEC-NEXT: [[TMP7]] = add i64 [[TMP1]], -1 +; VEC-NEXT: [[TMP6:%.*]] = add nuw nsw i64 undef, 1 +; VEC-NEXT: [[TMP7:%.*]] = add i64 undef, -1 ; VEC-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 -; VEC-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP5:!llvm.loop !.*]] +; VEC-NEXT: br label [[FOR_END]] ; VEC: for.end: ; VEC-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll --- a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll @@ -67,7 +67,7 @@ ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -82,7 +82,7 @@ ; CHECK-NEXT: store i8 7, i8* [[AJP3]], align 8 ; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i32 [[J]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[J_NEXT]], 15 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2 +; CHECK-NEXT: br i1 true, label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll --- a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll @@ -52,7 +52,7 @@ ; CHECK-NEXT: store i16 [[TMP18]], i16* @v_39, align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12 -; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 12, 12 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -75,7 +75,7 @@ ; CHECK-NEXT: store i16 [[COND6]], i16* @v_39, align 1 ; CHECK-NEXT: [[INC7]] = add nsw i16 [[I_07]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[INC7]], 111 -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop !2 +; CHECK-NEXT: br i1 false, label [[FOR_BODY]], label [[EXIT]], [[LOOP2:!llvm.loop !.*]] ; CHECK: exit: ; CHECK-NEXT: [[RV:%.*]] = load i16, i16* @v_39, align 1 ; CHECK-NEXT: ret i16 [[RV]] diff --git a/llvm/test/Transforms/LoopVectorize/pr44547.ll b/llvm/test/Transforms/LoopVectorize/pr44547.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/pr44547.ll @@ -0,0 +1,62 @@ +; RUN: opt -S -loop-vectorize -simplifycfg -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +;CHECK-LABEL: @single_iter_remainder( +define void @single_iter_remainder(i16* noalias nocapture readonly %a, i16* noalias nocapture readonly %b, i16* noalias nocapture %c, i32 %n) { +entry: + %cmp7 = icmp eq i32 %n, 0 + br i1 %cmp7, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + ret void + +;CHECK: vector.body: +;CHECK: for.body: +;CHECK: br label %for.cond.cleanup +for.body: ; preds = %entry, %for.body + %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %a.addr.010 = phi i16* [ %incdec.ptr, %for.body ], [ %a, %entry ] + %c.addr.09 = phi i16* [ %incdec.ptr4, %for.body ], [ %c, %entry ] + %b.addr.08 = phi i16* [ %incdec.ptr1, %for.body ], [ %b, %entry ] + %incdec.ptr = getelementptr inbounds i16, i16* %a.addr.010, i64 1 + %0 = load i16, i16* %a.addr.010, align 2 + %incdec.ptr1 = getelementptr inbounds i16, i16* %b.addr.08, i64 1 + %1 = load i16, i16* %b.addr.08, align 2 + %add = add i16 %1, %0 + %incdec.ptr4 = getelementptr inbounds i16, i16* %c.addr.09, i64 1 + store i16 %add, i16* %c.addr.09, align 2 + %inc = add nuw nsw i32 %i.011, 1 + %exitcond = icmp eq i32 %inc, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +;CHECK-LABEL: @single_iter_remainder_checks( +define void @single_iter_remainder_checks(i16* %a, i16* %b, i16* %c, i32 %n) { +entry: + %cmp7 = icmp eq i32 %n, 0 + br i1 %cmp7, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + ret void + +;CHECK: vector.body: +;CHECK: for.body: +;CHECK-NOT: br label %for.cond.cleanup +;CHECK: br i1 %exitcond, label %for.cond.cleanup, label %for.body +for.body: ; preds = %entry, %for.body + %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %a.addr.010 = phi i16* [ %incdec.ptr, %for.body ], [ %a, %entry ] + %c.addr.09 = phi i16* [ %incdec.ptr4, %for.body ], [ %c, %entry ] + %b.addr.08 = phi i16* [ %incdec.ptr1, %for.body ], [ %b, %entry ] + %incdec.ptr = getelementptr inbounds i16, i16* %a.addr.010, i64 1 + %0 = load i16, i16* %a.addr.010, align 2 + %incdec.ptr1 = getelementptr inbounds i16, i16* %b.addr.08, i64 1 + %1 = load i16, i16* %b.addr.08, align 2 + %add = add i16 %1, %0 + %incdec.ptr4 = getelementptr inbounds i16, i16* %c.addr.09, i64 1 + store i16 %add, i16* %c.addr.09, align 2 + %inc = add nuw nsw i32 %i.011, 1 + %exitcond = icmp eq i32 %inc, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body +}