diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3595,6 +3595,16 @@ Optional VectorizedLoopID = makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll, LLVMLoopVectorizeFollowupVectorized}); + + // If VFxUF is 2 and vector loop is not skipped then remainder executes once. + if (!VF.isScalable() && VF.getKnownMinValue() * UF == 2 && + isa(ScalarLatchTerm) && !areSafetyChecksAdded() && + !Cost->requiresScalarEpilogue(VF)) { + auto *ScalarLatchBr = cast(ScalarLatchTerm); + ScalarLatchBr->setCondition( + Builder.getInt1(ScalarLatchBr->getSuccessor(0) == LoopExitBlock)); + } + if (VectorizedLoopID.hasValue()) { L->setLoopID(VectorizedLoopID.getValue()); @@ -4102,15 +4112,15 @@ // loop iterations are now distributed among them. Note that original loop // represented by LoopScalarBody becomes remainder loop after vectorization. // - // For cases like foldTailByMasking() and requiresScalarEpiloque() we may + // For cases like foldTailByMasking() and requiresScalarEpilogue() we may // end up getting slightly roughened result but that should be OK since // profile is not inherently precise anyway. Note also possible bypass of // vector code caused by legality checks is ignored, assigning all the weight // to the vector loop, optimistically. // - // For scalable vectorization we can't know at compile time how many iterations - // of the loop are handled in one vector iteration, so instead assume a pessimistic - // vscale of '1'. + // For scalable vectorization we can't know at compile time how many + // iterations of the loop are handled in one vector iteration, so instead + // assume a pessimistic vscale of '1'. setProfileInfoAfterUnrolling( LI->getLoopFor(LoopScalarBody), LI->getLoopFor(LoopVectorBody), LI->getLoopFor(LoopScalarBody), VF.getKnownMinValue() * UF); diff --git a/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll b/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll @@ -91,7 +91,7 @@ ; CHECK-NEXT: [[SUB127]] = fsub fast double [[DVAL1_4131]], [[MUL126]] ; CHECK-NEXT: [[INC129]] = add nuw nsw i32 [[I_2132]], 1 ; CHECK-NEXT: [[EXITCOND143:%.*]] = icmp eq i32 [[INC129]], [[T]] -; CHECK-NEXT: br i1 [[EXITCOND143]], label [[OUTEREND]], label [[INNERLOOP]], [[LOOP2:!llvm.loop !.*]] +; CHECK-NEXT: br i1 true, label [[OUTEREND]], label [[INNERLOOP]], [[LOOP2:!llvm.loop !.*]] ; CHECK: outerend: ; CHECK-NEXT: [[SUB127_LCSSA:%.*]] = phi double [ [[SUB127]], [[INNERLOOP]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[CONV138:%.*]] = fptosi double [[SUB127_LCSSA]] to i32 diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll --- a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll @@ -243,7 +243,7 @@ ; VF-TWO-CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX4]], align 4 ; VF-TWO-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VF-TWO-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; VF-TWO-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] +; VF-TWO-CHECK-NEXT: br i1 false, label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] ; VF-TWO-CHECK: for.end.loopexit.loopexit: ; VF-TWO-CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] ; VF-TWO-CHECK: for.end.loopexit: @@ -497,6 +497,7 @@ ; + entry: %cmp1 = icmp sgt i32 %N, 0 br i1 %cmp1, label %for.body.preheader, label %for.end @@ -749,7 +750,7 @@ ; VF-TWO-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VF-TWO-CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_014]], 1 ; VF-TWO-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; VF-TWO-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] +; VF-TWO-CHECK-NEXT: br i1 false, label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] ; VF-TWO-CHECK: for.end.loopexit.loopexit: ; VF-TWO-CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] ; VF-TWO-CHECK: for.end.loopexit: @@ -990,6 +991,7 @@ ; + entry: %cmp1 = icmp sgt i32 %n, 1 br i1 %cmp1, label %for.body.preheader, label %for.end diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll --- a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll @@ -79,7 +79,7 @@ ; CHECK-NEXT: store i32 [[SCALAR_RECUR]], i32* [[B_PTR]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 5 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], [[LOOP2:!llvm.loop !.*]] +; CHECK-NEXT: br i1 true, label [[EXIT]], label [[LOOP]], [[LOOP2:!llvm.loop !.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll --- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll @@ -47,7 +47,7 @@ ; CHECK-NEXT: store i16* [[_TMP4]], i16** [[_TMP7]], align 8 ; CHECK-NEXT: [[_TMP9]] = add nsw i16 [[C_1_0]], 1 ; CHECK-NEXT: [[_TMP11:%.*]] = icmp slt i16 [[_TMP9]], 2 -; CHECK-NEXT: br i1 [[_TMP11]], label [[BB2]], label [[BB3]], [[LOOP2:!llvm.loop !.*]] +; CHECK-NEXT: br i1 false, label [[BB2]], label [[BB3]], [[LOOP2:!llvm.loop !.*]] ; CHECK: bb3: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -276,9 +276,7 @@ ; VEC1_INTERL2-NEXT: store float [[X_05]], float* [[ARRAYIDX]], align 4 ; VEC1_INTERL2-NEXT: [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]] ; VEC1_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; VEC1_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; VEC1_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; VEC1_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]] +; VEC1_INTERL2-NEXT: br i1 true, label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]] ; VEC1_INTERL2: for.end.loopexit: ; VEC1_INTERL2-NEXT: br label [[FOR_END]] ; VEC1_INTERL2: for.end: @@ -324,15 +322,11 @@ ; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] ; VEC2_INTERL1_PRED_STORE: for.body: -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] -; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_05]], float* [[ARRAYIDX]], align 4 -; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP5:!llvm.loop !.*]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[BC_RESUME_VAL]] +; VEC2_INTERL1_PRED_STORE-NEXT: store float [[BC_RESUME_VAL1]], float* [[ARRAYIDX]], align 4 +; VEC2_INTERL1_PRED_STORE-NEXT: br label [[FOR_END]], !llvm.loop [[LOOP2:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: for.end: ; VEC2_INTERL1_PRED_STORE-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -39,19 +39,18 @@ ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 128, 128 ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] ; UNROLL: for.body: -; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 128, [[MIDDLE_BLOCK]] ] -; UNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 [[INDVARS_IV]] +; UNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 128 ; UNROLL-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 ; UNROLL-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP9]], 100 -; UNROLL-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; UNROLL-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] ; UNROLL: if.then: ; UNROLL-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 20 ; UNROLL-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX]], align 4 ; UNROLL-NEXT: br label [[FOR_INC]] ; UNROLL: for.inc: -; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; UNROLL-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 128, 1 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128 -; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; UNROLL-NEXT: br label [[FOR_END]], !llvm.loop [[LOOP2:![0-9]+]] ; UNROLL: for.end: ; UNROLL-NEXT: ret i32 0 ; @@ -104,7 +103,7 @@ ; UNROLL-NOSIMPLIFY: for.inc: ; UNROLL-NOSIMPLIFY-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; UNROLL-NOSIMPLIFY-NEXT: br i1 true, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; UNROLL-NOSIMPLIFY: for.end: ; UNROLL-NOSIMPLIFY-NEXT: ret i32 0 ; @@ -144,22 +143,22 @@ ; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 128, 128 ; VEC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] ; VEC: for.body: -; VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 128, [[MIDDLE_BLOCK]] ] -; VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 [[INDVARS_IV]] +; VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 128 ; VEC-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 ; VEC-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP14]], 100 -; VEC-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; VEC-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] ; VEC: if.then: ; VEC-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], 20 ; VEC-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX]], align 4 ; VEC-NEXT: br label [[FOR_INC]] ; VEC: for.inc: -; VEC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; VEC-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 128, 1 ; VEC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128 -; VEC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; VEC-NEXT: br label [[FOR_END]], !llvm.loop [[LOOP2:![0-9]+]] ; VEC: for.end: ; VEC-NEXT: ret i32 0 ; + entry: br label %for.body @@ -201,62 +200,37 @@ ; UNROLL-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 ; UNROLL-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2 -; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; UNROLL: vector.ph: +; UNROLL-NEXT: [[TMP4:%.*]] = xor i1 [[MIN_ITERS_CHECK]], true +; UNROLL-NEXT: call void @llvm.assume(i1 [[TMP4]]) ; UNROLL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2 ; UNROLL-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] ; UNROLL-NEXT: [[IND_END:%.*]] = add i64 undef, [[N_VEC]] ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: -; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] -; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ undef, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[PRED_STORE_CONTINUE4]] ] -; UNROLL-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[PREDPHI5:%.*]], [[PRED_STORE_CONTINUE4]] ] +; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] +; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ undef, [[ENTRY]] ], [ [[PREDPHI:%.*]], [[PRED_STORE_CONTINUE4]] ] +; UNROLL-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[PREDPHI5:%.*]], [[PRED_STORE_CONTINUE4]] ] ; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = add i64 undef, [[INDEX]] ; UNROLL-NEXT: [[INDUCTION:%.*]] = add i64 [[OFFSET_IDX]], 0 ; UNROLL-NEXT: [[INDUCTION1:%.*]] = add i64 [[OFFSET_IDX]], 1 -; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDUCTION]] -; UNROLL-NEXT: [[TMP5:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDUCTION1]] -; UNROLL-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +; UNROLL-NEXT: [[TMP5:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDUCTION]] +; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDUCTION1]] ; UNROLL-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4 +; UNROLL-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP6]], align 4 ; UNROLL-NEXT: br i1 undef, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE4]] ; UNROLL: pred.store.if: -; UNROLL-NEXT: store i32 2, i32* [[TMP4]], align 4 +; UNROLL-NEXT: store i32 2, i32* [[TMP5]], align 4 ; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE4]] ; UNROLL: pred.store.continue4: -; UNROLL-NEXT: [[TMP8:%.*]] = add i32 [[VEC_PHI]], 1 -; UNROLL-NEXT: [[TMP9:%.*]] = add i32 [[VEC_PHI2]], 1 -; UNROLL-NEXT: [[PREDPHI]] = select i1 undef, i32 [[VEC_PHI]], i32 [[TMP8]] -; UNROLL-NEXT: [[PREDPHI5]] = select i1 undef, i32 [[VEC_PHI2]], i32 [[TMP9]] +; UNROLL-NEXT: [[TMP9:%.*]] = add i32 [[VEC_PHI]], 1 +; UNROLL-NEXT: [[TMP10:%.*]] = add i32 [[VEC_PHI2]], 1 +; UNROLL-NEXT: [[PREDPHI]] = select i1 undef, i32 [[VEC_PHI]], i32 [[TMP9]] +; UNROLL-NEXT: [[PREDPHI5]] = select i1 undef, i32 [[VEC_PHI2]], i32 [[TMP10]] ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; UNROLL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] -; UNROLL: middle.block: -; UNROLL-NEXT: [[BIN_RDX:%.*]] = add i32 [[PREDPHI5]], [[PREDPHI]] -; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] -; UNROLL-NEXT: [[TMP11:%.*]] = xor i1 [[CMP_N]], true -; UNROLL-NEXT: call void @llvm.assume(i1 [[TMP11]]) -; UNROLL-NEXT: br label [[SCALAR_PH]] -; UNROLL: scalar.ph: -; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ undef, [[ENTRY:%.*]] ] -; UNROLL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ undef, [[ENTRY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] -; UNROLL-NEXT: br label [[FOR_BODY14:%.*]] -; UNROLL: for.body14: -; UNROLL-NEXT: [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; UNROLL-NEXT: [[INEWCHUNKS_120:%.*]] = phi i32 [ [[INEWCHUNKS_2:%.*]], [[FOR_INC23]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; UNROLL-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDVARS_IV3]] -; UNROLL-NEXT: [[TMP:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4 -; UNROLL-NEXT: br i1 undef, label [[IF_THEN18:%.*]], label [[FOR_INC23]] -; UNROLL: if.then18: -; UNROLL-NEXT: store i32 2, i32* [[ARRAYIDX16]], align 4 -; UNROLL-NEXT: [[INC21:%.*]] = add nsw i32 [[INEWCHUNKS_120]], 1 -; UNROLL-NEXT: br label [[FOR_INC23]] -; UNROLL: for.inc23: -; UNROLL-NEXT: [[INEWCHUNKS_2]] = phi i32 [ [[INC21]], [[IF_THEN18]] ], [ [[INEWCHUNKS_120]], [[FOR_BODY14]] ] -; UNROLL-NEXT: [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1 -; UNROLL-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32 -; UNROLL-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0 -; UNROLL-NEXT: call void @llvm.assume(i1 [[CMP13]]) -; UNROLL-NEXT: br label [[FOR_BODY14]] +; UNROLL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NEXT: [[TMP12:%.*]] = xor i1 [[TMP11]], true +; UNROLL-NEXT: call void @llvm.assume(i1 [[TMP12]]) +; UNROLL-NEXT: br label [[VECTOR_BODY]] ; ; UNROLL-NOSIMPLIFY-LABEL: @bug18724( ; UNROLL-NOSIMPLIFY-NEXT: entry: @@ -326,7 +300,7 @@ ; UNROLL-NOSIMPLIFY-NEXT: [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32 ; UNROLL-NOSIMPLIFY-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[CMP13]], label [[FOR_BODY14]], label [[FOR_INC26_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] +; UNROLL-NOSIMPLIFY-NEXT: br i1 false, label [[FOR_BODY14]], label [[FOR_INC26_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] ; UNROLL-NOSIMPLIFY: for.inc26.loopexit: ; UNROLL-NOSIMPLIFY-NEXT: [[INEWCHUNKS_2_LCSSA:%.*]] = phi i32 [ [[INEWCHUNKS_2]], [[FOR_INC23]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] ; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_INC26]] @@ -343,59 +317,35 @@ ; VEC-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 ; VEC-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 ; VEC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2 -; VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; VEC: vector.ph: +; VEC-NEXT: [[TMP4:%.*]] = xor i1 [[MIN_ITERS_CHECK]], true +; VEC-NEXT: call void @llvm.assume(i1 [[TMP4]]) ; VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2 ; VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] ; VEC-NEXT: [[IND_END:%.*]] = add i64 undef, [[N_VEC]] ; VEC-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC: vector.body: -; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] -; VEC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[PRED_STORE_CONTINUE2]] ] +; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] +; VEC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[ENTRY]] ], [ [[PREDPHI:%.*]], [[PRED_STORE_CONTINUE2]] ] ; VEC-NEXT: [[OFFSET_IDX:%.*]] = add i64 undef, [[INDEX]] -; VEC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VEC-NEXT: [[TMP5:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[TMP4]] -; VEC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0 -; VEC-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <2 x i32>* -; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP7]], align 4 +; VEC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0 +; VEC-NEXT: [[TMP6:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[TMP5]] +; VEC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 0 +; VEC-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <2 x i32>* +; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP8]], align 4 ; VEC-NEXT: br i1 undef, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE2]] ; VEC: pred.store.if: -; VEC-NEXT: store i32 2, i32* [[TMP5]], align 4 +; VEC-NEXT: store i32 2, i32* [[TMP6]], align 4 ; VEC-NEXT: br label [[PRED_STORE_CONTINUE2]] ; VEC: pred.store.continue2: -; VEC-NEXT: [[TMP8:%.*]] = add <2 x i32> [[VEC_PHI]], -; VEC-NEXT: [[PREDPHI]] = select <2 x i1> undef, <2 x i32> [[VEC_PHI]], <2 x i32> [[TMP8]] +; VEC-NEXT: [[TMP9:%.*]] = add <2 x i32> [[VEC_PHI]], +; VEC-NEXT: [[PREDPHI]] = select <2 x i1> undef, <2 x i32> [[VEC_PHI]], <2 x i32> [[TMP9]] ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VEC-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VEC-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; VEC: middle.block: -; VEC-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[PREDPHI]]) -; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] -; VEC-NEXT: [[TMP11:%.*]] = xor i1 [[CMP_N]], true +; VEC-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VEC-NEXT: [[TMP11:%.*]] = xor i1 [[TMP10]], true ; VEC-NEXT: call void @llvm.assume(i1 [[TMP11]]) -; VEC-NEXT: br label [[SCALAR_PH]] -; VEC: scalar.ph: -; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ undef, [[ENTRY:%.*]] ] -; VEC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ undef, [[ENTRY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] -; VEC-NEXT: br label [[FOR_BODY14:%.*]] -; VEC: for.body14: -; VEC-NEXT: [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; VEC-NEXT: [[INEWCHUNKS_120:%.*]] = phi i32 [ [[INEWCHUNKS_2:%.*]], [[FOR_INC23]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; VEC-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDVARS_IV3]] -; VEC-NEXT: [[TMP:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4 -; VEC-NEXT: br i1 undef, label [[IF_THEN18:%.*]], label [[FOR_INC23]] -; VEC: if.then18: -; VEC-NEXT: store i32 2, i32* [[ARRAYIDX16]], align 4 -; VEC-NEXT: [[INC21:%.*]] = add nsw i32 [[INEWCHUNKS_120]], 1 -; VEC-NEXT: br label [[FOR_INC23]] -; VEC: for.inc23: -; VEC-NEXT: [[INEWCHUNKS_2]] = phi i32 [ [[INC21]], [[IF_THEN18]] ], [ [[INEWCHUNKS_120]], [[FOR_BODY14]] ] -; VEC-NEXT: [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1 -; VEC-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32 -; VEC-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0 -; VEC-NEXT: call void @llvm.assume(i1 [[CMP13]]) -; VEC-NEXT: br label [[FOR_BODY14]] +; VEC-NEXT: br label [[VECTOR_BODY]] ; + entry: br label %for.body9 @@ -457,26 +407,24 @@ ; UNROLL: pred.store.continue6: ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef -; UNROLL-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] ; UNROLL: for.body: -; UNROLL-NEXT: [[TMP0:%.*]] = phi i64 [ [[TMP6:%.*]], [[FOR_INC:%.*]] ], [ undef, [[MIDDLE_BLOCK]] ] -; UNROLL-NEXT: [[TMP1:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ undef, [[MIDDLE_BLOCK]] ] -; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 [[TMP0]] +; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 undef ; UNROLL-NEXT: [[TMP3:%.*]] = load i8, i8* [[TMP2]], align 1 -; UNROLL-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; UNROLL-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] ; UNROLL: if.then: ; UNROLL-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32 ; UNROLL-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 ; UNROLL-NEXT: store i8 [[TMP5]], i8* [[TMP2]], align 1 ; UNROLL-NEXT: br label [[FOR_INC]] ; UNROLL: for.inc: -; UNROLL-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 -; UNROLL-NEXT: [[TMP7]] = add i64 [[TMP1]], -1 +; UNROLL-NEXT: [[TMP6:%.*]] = add nuw nsw i64 undef, 1 +; UNROLL-NEXT: [[TMP7:%.*]] = add i64 undef, -1 ; UNROLL-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 -; UNROLL-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; UNROLL-NEXT: br label [[FOR_END]], !llvm.loop [[LOOP4:![0-9]+]] ; UNROLL: for.end: ; UNROLL-NEXT: ret void ; @@ -535,7 +483,7 @@ ; UNROLL-NOSIMPLIFY-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP7]] = add i64 [[TMP1]], -1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; UNROLL-NOSIMPLIFY-NEXT: br i1 true, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; UNROLL-NOSIMPLIFY: for.end: ; UNROLL-NOSIMPLIFY-NEXT: ret void ; @@ -575,29 +523,28 @@ ; VEC: pred.store.continue3: ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VEC-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef -; VEC-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; VEC-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VEC: middle.block: ; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef ; VEC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] ; VEC: for.body: -; VEC-NEXT: [[TMP0:%.*]] = phi i64 [ [[TMP6:%.*]], [[FOR_INC:%.*]] ], [ undef, [[MIDDLE_BLOCK]] ] -; VEC-NEXT: [[TMP1:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ undef, [[MIDDLE_BLOCK]] ] -; VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 [[TMP0]] +; VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 undef ; VEC-NEXT: [[TMP3:%.*]] = load i8, i8* [[TMP2]], align 1 -; VEC-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; VEC-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] ; VEC: if.then: ; VEC-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32 ; VEC-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 ; VEC-NEXT: store i8 [[TMP5]], i8* [[TMP2]], align 1 ; VEC-NEXT: br label [[FOR_INC]] ; VEC: for.inc: -; VEC-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 -; VEC-NEXT: [[TMP7]] = add i64 [[TMP1]], -1 +; VEC-NEXT: [[TMP6:%.*]] = add nuw nsw i64 undef, 1 +; VEC-NEXT: [[TMP7:%.*]] = add i64 undef, -1 ; VEC-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 -; VEC-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VEC-NEXT: br label [[FOR_END]], !llvm.loop [[LOOP5:![0-9]+]] ; VEC: for.end: ; VEC-NEXT: ret void ; + entry: br label %for.body @@ -627,25 +574,19 @@ define void @minimal_bit_widths_with_aliasing_store(i1 %c, i8* %ptr) { ; UNROLL-LABEL: @minimal_bit_widths_with_aliasing_store( ; UNROLL-NEXT: entry: -; UNROLL-NEXT: br label [[FOR_BODY:%.*]] -; UNROLL: for.body: -; UNROLL-NEXT: [[TMP0:%.*]] = phi i64 [ [[TMP6:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NEXT: [[TMP1:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ] -; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* [[PTR:%.*]], i64 [[TMP0]] +; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* [[PTR:%.*]], i64 0 ; UNROLL-NEXT: [[TMP3:%.*]] = load i8, i8* [[TMP2]], align 1 ; UNROLL-NEXT: store i8 0, i8* [[TMP2]], align 1 -; UNROLL-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; UNROLL-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] ; UNROLL: if.then: ; UNROLL-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32 ; UNROLL-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 ; UNROLL-NEXT: store i8 [[TMP5]], i8* [[TMP2]], align 1 ; UNROLL-NEXT: br label [[FOR_INC]] ; UNROLL: for.inc: -; UNROLL-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 -; UNROLL-NEXT: [[TMP7]] = add i64 [[TMP1]], -1 +; UNROLL-NEXT: [[TMP6:%.*]] = add nuw nsw i64 0, 1 +; UNROLL-NEXT: [[TMP7:%.*]] = add i64 0, -1 ; UNROLL-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 -; UNROLL-NEXT: br i1 [[TMP8]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] -; UNROLL: for.end: ; UNROLL-NEXT: ret void ; ; UNROLL-NOSIMPLIFY-LABEL: @minimal_bit_widths_with_aliasing_store( @@ -706,31 +647,25 @@ ; UNROLL-NOSIMPLIFY-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP7]] = add i64 [[TMP1]], -1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; UNROLL-NOSIMPLIFY-NEXT: br i1 true, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; UNROLL-NOSIMPLIFY: for.end: ; UNROLL-NOSIMPLIFY-NEXT: ret void ; ; VEC-LABEL: @minimal_bit_widths_with_aliasing_store( ; VEC-NEXT: entry: -; VEC-NEXT: br label [[FOR_BODY:%.*]] -; VEC: for.body: -; VEC-NEXT: [[TMP0:%.*]] = phi i64 [ [[TMP6:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; VEC-NEXT: [[TMP1:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ] -; VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* [[PTR:%.*]], i64 [[TMP0]] +; VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* [[PTR:%.*]], i64 0 ; VEC-NEXT: [[TMP3:%.*]] = load i8, i8* [[TMP2]], align 1 ; VEC-NEXT: store i8 0, i8* [[TMP2]], align 1 -; VEC-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; VEC-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] ; VEC: if.then: ; VEC-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32 ; VEC-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 ; VEC-NEXT: store i8 [[TMP5]], i8* [[TMP2]], align 1 ; VEC-NEXT: br label [[FOR_INC]] ; VEC: for.inc: -; VEC-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 -; VEC-NEXT: [[TMP7]] = add i64 [[TMP1]], -1 +; VEC-NEXT: [[TMP6:%.*]] = add nuw nsw i64 0, 1 +; VEC-NEXT: [[TMP7:%.*]] = add i64 0, -1 ; VEC-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 -; VEC-NEXT: br i1 [[TMP8]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] -; VEC: for.end: ; VEC-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll --- a/llvm/test/Transforms/LoopVectorize/loop-form.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll @@ -39,7 +39,7 @@ ; CHECK-NEXT: store i16 0, i16* [[B]], align 4 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], [[LOOP2:!llvm.loop !.*]] +; CHECK-NEXT: br i1 false, label [[FOR_COND]], label [[IF_END]], [[LOOP2:!llvm.loop !.*]] ; CHECK: if.end: ; CHECK-NEXT: ret void ; @@ -95,7 +95,7 @@ ; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 ; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] -; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], [[LOOP2:!llvm.loop !.*]] +; TAILFOLD-NEXT: br i1 false, label [[FOR_COND]], label [[IF_END]], [[LOOP2:!llvm.loop !.*]] ; TAILFOLD: if.end: ; TAILFOLD-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll --- a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll @@ -67,7 +67,7 @@ ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -82,7 +82,7 @@ ; CHECK-NEXT: store i8 7, i8* [[AJP3]], align 8 ; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i32 [[J]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[J_NEXT]], 15 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2 +; CHECK-NEXT: br i1 true, label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-liveout.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-liveout.ll --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-liveout.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-liveout.ll @@ -85,7 +85,7 @@ ; VF-TWO-CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] ; VF-TWO-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VF-TWO-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; VF-TWO-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], [[LOOP4:!llvm.loop !.*]] +; VF-TWO-CHECK-NEXT: br i1 false, label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], [[LOOP4:!llvm.loop !.*]] ; VF-TWO-CHECK: for.end.loopexit.loopexit: ; VF-TWO-CHECK-NEXT: [[ADD_LCSSA3:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; VF-TWO-CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -454,7 +454,7 @@ ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store i8 1, i8* [[ARRAYIDX]], align 1 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[N]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], [[LOOP4:!llvm.loop !.*]] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 false, label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], [[LOOP4:!llvm.loop !.*]] ; CHECK-PROFITABLE-BY-DEFAULT: for.end.loopexit.loopexit: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[FOR_END_LOOPEXIT]] ; CHECK-PROFITABLE-BY-DEFAULT: for.end.loopexit: diff --git a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll --- a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll @@ -74,7 +74,7 @@ ; CHECK-NEXT: store i16 [[COND6]], i16* @v_39, align 1 ; CHECK-NEXT: [[INC7]] = add nsw i16 [[I_07]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[INC7]], 111 -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]], [[LOOP2:!llvm.loop !.*]] +; CHECK-NEXT: br i1 false, label [[FOR_BODY]], label [[EXIT]], [[LOOP2:!llvm.loop !.*]] ; CHECK: exit: ; CHECK-NEXT: [[RV:%.*]] = load i16, i16* @v_39, align 1 ; CHECK-NEXT: ret i16 [[RV]] diff --git a/llvm/test/Transforms/LoopVectorize/pr44547.ll b/llvm/test/Transforms/LoopVectorize/pr44547.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/pr44547.ll @@ -0,0 +1,62 @@ +; RUN: opt -S -loop-vectorize -simplifycfg -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +;CHECK-LABEL: @single_iter_remainder( +define void @single_iter_remainder(i16* noalias nocapture readonly %a, i16* noalias nocapture readonly %b, i16* noalias nocapture %c, i32 %n) { +entry: + %cmp7 = icmp eq i32 %n, 0 + br i1 %cmp7, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + ret void + +;CHECK: vector.body: +;CHECK: for.body: +;CHECK: br label %for.cond.cleanup +for.body: ; preds = %entry, %for.body + %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %a.addr.010 = phi i16* [ %incdec.ptr, %for.body ], [ %a, %entry ] + %c.addr.09 = phi i16* [ %incdec.ptr4, %for.body ], [ %c, %entry ] + %b.addr.08 = phi i16* [ %incdec.ptr1, %for.body ], [ %b, %entry ] + %incdec.ptr = getelementptr inbounds i16, i16* %a.addr.010, i64 1 + %0 = load i16, i16* %a.addr.010, align 2 + %incdec.ptr1 = getelementptr inbounds i16, i16* %b.addr.08, i64 1 + %1 = load i16, i16* %b.addr.08, align 2 + %add = add i16 %1, %0 + %incdec.ptr4 = getelementptr inbounds i16, i16* %c.addr.09, i64 1 + store i16 %add, i16* %c.addr.09, align 2 + %inc = add nuw nsw i32 %i.011, 1 + %exitcond = icmp eq i32 %inc, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +;CHECK-LABEL: @single_iter_remainder_checks( +define void @single_iter_remainder_checks(i16* %a, i16* %b, i16* %c, i32 %n) { +entry: + %cmp7 = icmp eq i32 %n, 0 + br i1 %cmp7, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + ret void + +;CHECK: vector.body: +;CHECK: for.body: +;CHECK-NOT: br label %for.cond.cleanup +;CHECK: br i1 %exitcond, label %for.cond.cleanup, label %for.body +for.body: ; preds = %entry, %for.body + %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %a.addr.010 = phi i16* [ %incdec.ptr, %for.body ], [ %a, %entry ] + %c.addr.09 = phi i16* [ %incdec.ptr4, %for.body ], [ %c, %entry ] + %b.addr.08 = phi i16* [ %incdec.ptr1, %for.body ], [ %b, %entry ] + %incdec.ptr = getelementptr inbounds i16, i16* %a.addr.010, i64 1 + %0 = load i16, i16* %a.addr.010, align 2 + %incdec.ptr1 = getelementptr inbounds i16, i16* %b.addr.08, i64 1 + %1 = load i16, i16* %b.addr.08, align 2 + %add = add i16 %1, %0 + %incdec.ptr4 = getelementptr inbounds i16, i16* %c.addr.09, i64 1 + store i16 %add, i16* %c.addr.09, align 2 + %inc = add nuw nsw i32 %i.011, 1 + %exitcond = icmp eq i32 %inc, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} diff --git a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll --- a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll +++ b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll @@ -82,7 +82,7 @@ ; CHECK-NEXT: [[V3:%.*]] = add i8 [[V2]], 1 ; CHECK-NEXT: [[CMP15:%.*]] = icmp slt i8 [[V3]], 5 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[INC]] -; CHECK-NEXT: br i1 [[CMP15]], label [[LOOP]], label [[LOOP_EXIT]], [[LOOP2:!llvm.loop !.*]] +; CHECK-NEXT: br i1 false, label [[LOOP]], label [[LOOP_EXIT]], [[LOOP2:!llvm.loop !.*]] ; CHECK: loop.exit: ; CHECK-NEXT: [[DIV_1:%.*]] = udiv i64 [[Y]], [[ADD]] ; CHECK-NEXT: [[V1:%.*]] = add i64 [[DIV_1]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll --- a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll @@ -35,7 +35,7 @@ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 32, 32 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -60,7 +60,7 @@ ; CHECK-NEXT: store i16 [[RES]], i16* [[DST_PTR]], align 2 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 31 -; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], [[LOOP2:!llvm.loop !.*]] +; CHECK-NEXT: br i1 false, label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -130,7 +130,7 @@ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 32, 32 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -156,7 +156,7 @@ ; CHECK-NEXT: store i16 [[RES]], i16* [[DST_PTR]], align 2 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 31 -; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], [[LOOP5:!llvm.loop !.*]] +; CHECK-NEXT: br i1 false, label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -220,7 +220,7 @@ ; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], ; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i16> [[VEC_IND3]], ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 32, 32 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -241,7 +241,7 @@ ; CHECK-NEXT: [[LV:%.*]] = load i16, i16* [[SRC_PTR]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 31 -; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], [[LOOP7:!llvm.loop !.*]] +; CHECK-NEXT: br i1 false, label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -322,7 +322,7 @@ ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 -; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, 64 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -348,7 +348,7 @@ ; CHECK-NEXT: store i16 [[RES]], i16* [[DST_PTR]], align 2 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 63 -; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], [[LOOP9:!llvm.loop !.*]] +; CHECK-NEXT: br i1 false, label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -404,7 +404,7 @@ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 1000, 1000 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -421,7 +421,7 @@ ; CHECK-NEXT: store i32 [[P]], i32* [[GEP_PTR]], align 4 ; CHECK-NEXT: [[ADD_I]] = add nsw i32 [[P]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD_I]], 1000 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_HEADER]], label [[EXIT]], [[LOOP11:!llvm.loop !.*]] +; CHECK-NEXT: br i1 false, label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ;