diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3102,6 +3102,12 @@ Optional VectorizedLoopID = makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll, LLVMLoopVectorizeFollowupVectorized}); + if (VF * UF == 2) { + if (BasicBlock *Latch = OrigLoop->getLoopLatch()) + if (BranchInst *BI = dyn_cast_or_null(Latch->getTerminator())) + BI->setCondition(Builder.getInt1(BI->getSuccessor(0) == LoopExitBlock)); + } + if (VectorizedLoopID.hasValue()) { Lp->setLoopID(VectorizedLoopID.getValue()); diff --git a/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll b/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll @@ -96,7 +96,7 @@ ; CHECK-NEXT: [[SUB127]] = fsub fast double [[DVAL1_4131]], [[MUL126]] ; CHECK-NEXT: [[INC129]] = add nuw nsw i32 [[I_2132]], 1 ; CHECK-NEXT: [[EXITCOND143:%.*]] = icmp eq i32 [[INC129]], [[T]] -; CHECK-NEXT: br i1 [[EXITCOND143]], label [[OUTEREND]], label [[INNERLOOP]], !llvm.loop !2 +; CHECK-NEXT: br i1 true, label [[OUTEREND]], label [[INNERLOOP]], !llvm.loop !2 ; CHECK: outerend: ; CHECK-NEXT: [[SUB127_LCSSA:%.*]] = phi double [ [[SUB127]], [[INNERLOOP]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[CONV138:%.*]] = fptosi double [[SUB127_LCSSA]] to i32 diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll --- a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll @@ -80,7 +80,7 @@ ; CHECK-NEXT: store i32 [[SCALAR_RECUR]], i32* [[B_PTR]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 5 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop !2 +; CHECK-NEXT: br i1 true, label [[EXIT]], label [[LOOP]], !llvm.loop !2 ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -39,19 +39,18 @@ ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 128, 128 ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] ; UNROLL: for.body: -; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 128, [[MIDDLE_BLOCK]] ] -; UNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 [[INDVARS_IV]] +; UNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 128 ; UNROLL-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 ; UNROLL-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP9]], 100 -; UNROLL-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; UNROLL-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] ; UNROLL: if.then: ; UNROLL-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 20 ; UNROLL-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX]], align 4 ; UNROLL-NEXT: br label [[FOR_INC]] ; UNROLL: for.inc: -; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; UNROLL-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 128, 1 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128 -; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2 +; UNROLL-NEXT: br label [[FOR_END]] ; UNROLL: for.end: ; UNROLL-NEXT: ret i32 0 ; @@ -104,7 +103,7 @@ ; UNROLL-NOSIMPLIFY: for.inc: ; UNROLL-NOSIMPLIFY-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2 +; UNROLL-NOSIMPLIFY-NEXT: br i1 true, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2 ; UNROLL-NOSIMPLIFY: for.end: ; UNROLL-NOSIMPLIFY-NEXT: ret i32 0 ; @@ -147,19 +146,18 @@ ; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 128, 128 ; VEC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] ; VEC: for.body: -; VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 128, [[MIDDLE_BLOCK]] ] -; VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 [[INDVARS_IV]] +; VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 128 ; VEC-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 ; VEC-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP14]], 100 -; VEC-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; VEC-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] ; VEC: if.then: ; VEC-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], 20 ; VEC-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX]], align 4 ; VEC-NEXT: br label [[FOR_INC]] ; VEC: for.inc: -; VEC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; VEC-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 128, 1 ; VEC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128 -; VEC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2 +; VEC-NEXT: br label [[FOR_END]] ; VEC: for.end: ; VEC-NEXT: ret i32 0 ; @@ -199,24 +197,7 @@ ; UNROLL-NEXT: entry: ; UNROLL-NEXT: [[TMP0:%.*]] = xor i1 [[COND:%.*]], true ; UNROLL-NEXT: call void @llvm.assume(i1 [[TMP0]]) -; UNROLL-NEXT: br label [[FOR_BODY14:%.*]] -; UNROLL: for.body14: -; UNROLL-NEXT: [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ undef, [[ENTRY:%.*]] ] -; UNROLL-NEXT: [[INEWCHUNKS_120:%.*]] = phi i32 [ [[INEWCHUNKS_2:%.*]], [[FOR_INC23]] ], [ undef, [[ENTRY]] ] -; UNROLL-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDVARS_IV3]] -; UNROLL-NEXT: [[TMP:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4 -; UNROLL-NEXT: br i1 undef, label [[IF_THEN18:%.*]], label [[FOR_INC23]] -; UNROLL: if.then18: -; UNROLL-NEXT: store i32 2, i32* [[ARRAYIDX16]], align 4 -; UNROLL-NEXT: [[INC21:%.*]] = add nsw i32 [[INEWCHUNKS_120]], 1 -; UNROLL-NEXT: br label [[FOR_INC23]] -; UNROLL: for.inc23: -; UNROLL-NEXT: [[INEWCHUNKS_2]] = phi i32 [ [[INC21]], [[IF_THEN18]] ], [ [[INEWCHUNKS_120]], [[FOR_BODY14]] ] -; UNROLL-NEXT: [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1 -; UNROLL-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32 -; UNROLL-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0 -; UNROLL-NEXT: call void @llvm.assume(i1 [[CMP13]]) -; UNROLL-NEXT: br label [[FOR_BODY14]] +; UNROLL-NEXT: unreachable ; ; UNROLL-NOSIMPLIFY-LABEL: @bug18724( ; UNROLL-NOSIMPLIFY-NEXT: entry: @@ -282,7 +263,7 @@ ; UNROLL-NOSIMPLIFY-NEXT: [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32 ; UNROLL-NOSIMPLIFY-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[CMP13]], label [[FOR_BODY14]], label [[FOR_INC26_LOOPEXIT]], !llvm.loop !4 +; UNROLL-NOSIMPLIFY-NEXT: br i1 false, label [[FOR_BODY14]], label [[FOR_INC26_LOOPEXIT]], !llvm.loop !4 ; UNROLL-NOSIMPLIFY: for.inc26.loopexit: ; UNROLL-NOSIMPLIFY-NEXT: [[INEWCHUNKS_2_LCSSA:%.*]] = phi i32 [ [[INEWCHUNKS_2]], [[FOR_INC23]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] ; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_INC26]] @@ -294,24 +275,7 @@ ; VEC-NEXT: entry: ; VEC-NEXT: [[TMP0:%.*]] = xor i1 [[COND:%.*]], true ; VEC-NEXT: call void @llvm.assume(i1 [[TMP0]]) -; VEC-NEXT: br label [[FOR_BODY14:%.*]] -; VEC: for.body14: -; VEC-NEXT: [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ undef, [[ENTRY:%.*]] ] -; VEC-NEXT: [[INEWCHUNKS_120:%.*]] = phi i32 [ [[INEWCHUNKS_2:%.*]], [[FOR_INC23]] ], [ undef, [[ENTRY]] ] -; VEC-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDVARS_IV3]] -; VEC-NEXT: [[TMP:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4 -; VEC-NEXT: br i1 undef, label [[IF_THEN18:%.*]], label [[FOR_INC23]] -; VEC: if.then18: -; VEC-NEXT: store i32 2, i32* [[ARRAYIDX16]], align 4 -; VEC-NEXT: [[INC21:%.*]] = add nsw i32 [[INEWCHUNKS_120]], 1 -; VEC-NEXT: br label [[FOR_INC23]] -; VEC: for.inc23: -; VEC-NEXT: [[INEWCHUNKS_2]] = phi i32 [ [[INC21]], [[IF_THEN18]] ], [ [[INEWCHUNKS_120]], [[FOR_BODY14]] ] -; VEC-NEXT: [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1 -; VEC-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32 -; VEC-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0 -; VEC-NEXT: call void @llvm.assume(i1 [[CMP13]]) -; VEC-NEXT: br label [[FOR_BODY14]] +; VEC-NEXT: unreachable ; entry: br label %for.body9 @@ -374,26 +338,24 @@ ; UNROLL: pred.store.continue6: ; UNROLL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; UNROLL-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef -; UNROLL-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !3 +; UNROLL-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !2 ; UNROLL: middle.block: ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] ; UNROLL: for.body: -; UNROLL-NEXT: [[TMP0:%.*]] = phi i64 [ [[TMP6:%.*]], [[FOR_INC:%.*]] ], [ undef, [[MIDDLE_BLOCK]] ] -; UNROLL-NEXT: [[TMP1:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ undef, [[MIDDLE_BLOCK]] ] -; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 [[TMP0]] +; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 undef ; UNROLL-NEXT: [[TMP3:%.*]] = load i8, i8* [[TMP2]], align 1 -; UNROLL-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; UNROLL-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] ; UNROLL: if.then: ; UNROLL-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32 ; UNROLL-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 ; UNROLL-NEXT: store i8 [[TMP5]], i8* [[TMP2]], align 1 ; UNROLL-NEXT: br label [[FOR_INC]] ; UNROLL: for.inc: -; UNROLL-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 -; UNROLL-NEXT: [[TMP7]] = add i64 [[TMP1]], -1 +; UNROLL-NEXT: [[TMP6:%.*]] = add nuw nsw i64 undef, 1 +; UNROLL-NEXT: [[TMP7:%.*]] = add i64 undef, -1 ; UNROLL-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 -; UNROLL-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !4 +; UNROLL-NEXT: br label [[FOR_END]] ; UNROLL: for.end: ; UNROLL-NEXT: ret void ; @@ -452,7 +414,7 @@ ; UNROLL-NOSIMPLIFY-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP7]] = add i64 [[TMP1]], -1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !6 +; UNROLL-NOSIMPLIFY-NEXT: br i1 true, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !6 ; UNROLL-NOSIMPLIFY: for.end: ; UNROLL-NOSIMPLIFY-NEXT: ret void ; @@ -498,26 +460,24 @@ ; VEC: pred.store.continue8: ; VEC-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; VEC-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef -; VEC-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4 +; VEC-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !2 ; VEC: middle.block: ; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef ; VEC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] ; VEC: for.body: -; VEC-NEXT: [[TMP0:%.*]] = phi i64 [ [[TMP6:%.*]], [[FOR_INC:%.*]] ], [ undef, [[MIDDLE_BLOCK]] ] -; VEC-NEXT: [[TMP1:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ undef, [[MIDDLE_BLOCK]] ] -; VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 [[TMP0]] +; VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 undef ; VEC-NEXT: [[TMP3:%.*]] = load i8, i8* [[TMP2]], align 1 -; VEC-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; VEC-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] ; VEC: if.then: ; VEC-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32 ; VEC-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 ; VEC-NEXT: store i8 [[TMP5]], i8* [[TMP2]], align 1 ; VEC-NEXT: br label [[FOR_INC]] ; VEC: for.inc: -; VEC-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 -; VEC-NEXT: [[TMP7]] = add i64 [[TMP1]], -1 +; VEC-NEXT: [[TMP6:%.*]] = add nuw nsw i64 undef, 1 +; VEC-NEXT: [[TMP7:%.*]] = add i64 undef, -1 ; VEC-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 -; VEC-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !5 +; VEC-NEXT: br label [[FOR_END]] ; VEC: for.end: ; VEC-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/pr44547.ll b/llvm/test/Transforms/LoopVectorize/pr44547.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/pr44547.ll @@ -0,0 +1,32 @@ +; RUN: opt -S -loop-vectorize -simplifycfg -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +;CHECK-LABEL: @single_iter_remainder( +define void @single_iter_remainder(i16* noalias nocapture readonly %a, i16* noalias nocapture readonly %b, i16* noalias nocapture %c, i32 %n) { +entry: + %cmp7 = icmp eq i32 %n, 0 + br i1 %cmp7, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + ret void + +;CHECK: vector.body: +;CHECK: for.body: +;CHECK: br label %for.cond.cleanup +for.body: ; preds = %entry, %for.body + %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %a.addr.010 = phi i16* [ %incdec.ptr, %for.body ], [ %a, %entry ] + %c.addr.09 = phi i16* [ %incdec.ptr4, %for.body ], [ %c, %entry ] + %b.addr.08 = phi i16* [ %incdec.ptr1, %for.body ], [ %b, %entry ] + %incdec.ptr = getelementptr inbounds i16, i16* %a.addr.010, i64 1 + %0 = load i16, i16* %a.addr.010, align 2 + %incdec.ptr1 = getelementptr inbounds i16, i16* %b.addr.08, i64 1 + %1 = load i16, i16* %b.addr.08, align 2 + %add = add i16 %1, %0 + %incdec.ptr4 = getelementptr inbounds i16, i16* %c.addr.09, i64 1 + store i16 %add, i16* %c.addr.09, align 2 + %inc = add nuw nsw i32 %i.011, 1 + %exitcond = icmp eq i32 %inc, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body +}