diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2652,6 +2652,8 @@ bool hasVF(ElementCount VF) { return VFs.count(VF); } + bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); } + const std::string &getName() const { return Name; } void setName(const Twine &newName) { Name = newName.str(); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -125,6 +125,7 @@ } } + bool ScalarVFOnly = Plan.hasScalarVFOnly(); // Try to sink each replicate recipe in the worklist. while (!WorkList.empty()) { VPBasicBlock *SinkTo; @@ -135,8 +136,8 @@ SinkCandidate = ScalarSteps; else { auto *RepR = dyn_cast_or_null(C->Def); - if (!RepR || RepR->isUniform() || RepR->mayHaveSideEffects() || - RepR->mayReadOrWriteMemory()) + if (!RepR || (!ScalarVFOnly && RepR->isUniform()) || + RepR->mayHaveSideEffects() || RepR->mayReadOrWriteMemory()) continue; SinkCandidate = RepR; } diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -3325,38 +3325,38 @@ ; UNROLL-NO-VF-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[PRED_STORE_CONTINUE10]] ] ; UNROLL-NO-VF-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_STORE_CONTINUE10]] ] ; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]] -; UNROLL-NO-VF-NEXT: [[INDUCTION4:%.*]] = add i32 [[OFFSET_IDX]], 0 -; UNROLL-NO-VF-NEXT: [[INDUCTION5:%.*]] = add i32 [[OFFSET_IDX]], -1 +; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i32 [[OFFSET_IDX]], 0 +; UNROLL-NO-VF-NEXT: [[INDUCTION3:%.*]] = add i32 [[OFFSET_IDX]], -1 ; UNROLL-NO-VF-NEXT: [[VEC_IV:%.*]] = add i32 [[INDEX]], 0 -; UNROLL-NO-VF-NEXT: [[VEC_IV6:%.*]] = add i32 [[INDEX]], 1 +; UNROLL-NO-VF-NEXT: [[VEC_IV4:%.*]] = add i32 [[INDEX]], 1 ; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = icmp ule i32 [[VEC_IV]], [[TRIP_COUNT_MINUS_1]] -; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = icmp ule i32 [[VEC_IV6]], [[TRIP_COUNT_MINUS_1]] +; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = icmp ule i32 [[VEC_IV4]], [[TRIP_COUNT_MINUS_1]] ; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; UNROLL-NO-VF: pred.udiv.if: -; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = udiv i32 219220132, [[INDUCTION4]] +; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = udiv i32 219220132, [[INDUCTION]] ; UNROLL-NO-VF-NEXT: br label [[PRED_UDIV_CONTINUE]] ; UNROLL-NO-VF: pred.udiv.continue: ; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_UDIV_IF]] ] -; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8:%.*]] -; UNROLL-NO-VF: pred.udiv.if7: -; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = udiv i32 219220132, [[INDUCTION5]] -; UNROLL-NO-VF-NEXT: br label [[PRED_UDIV_CONTINUE8]] -; UNROLL-NO-VF: pred.udiv.continue8: -; UNROLL-NO-VF-NEXT: [[TMP7]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE]] ], [ [[TMP6]], [[PRED_UDIV_IF7]] ] +; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]] +; UNROLL-NO-VF: pred.udiv.if5: +; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = udiv i32 219220132, [[INDUCTION3]] +; UNROLL-NO-VF-NEXT: br label [[PRED_UDIV_CONTINUE6]] +; UNROLL-NO-VF: pred.udiv.continue6: +; UNROLL-NO-VF-NEXT: [[TMP7]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE]] ], [ [[TMP6]], [[PRED_UDIV_IF5]] ] ; UNROLL-NO-VF-NEXT: [[TMP8]] = add i32 [[VEC_PHI]], [[VECTOR_RECUR]] ; UNROLL-NO-VF-NEXT: [[TMP9]] = add i32 [[VEC_PHI2]], [[TMP5]] ; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; UNROLL-NO-VF: pred.store.if: -; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i32 [[INDEX]], 0 -; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[INDUCTION]] -; UNROLL-NO-VF-NEXT: store i32 [[INDUCTION4]], i32* [[TMP10]], align 4 +; UNROLL-NO-VF-NEXT: [[INDUCTION7:%.*]] = add i32 [[INDEX]], 0 +; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[INDUCTION7]] +; UNROLL-NO-VF-NEXT: store i32 [[INDUCTION]], i32* [[TMP10]], align 4 ; UNROLL-NO-VF-NEXT: br label [[PRED_STORE_CONTINUE]] ; UNROLL-NO-VF: pred.store.continue: -; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10]] -; UNROLL-NO-VF: pred.store.if9: -; UNROLL-NO-VF-NEXT: [[INDUCTION3:%.*]] = add i32 [[INDEX]], 1 -; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[INDUCTION3]] -; UNROLL-NO-VF-NEXT: store i32 [[INDUCTION5]], i32* [[TMP11]], align 4 +; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE10]] +; UNROLL-NO-VF: pred.store.if8: +; UNROLL-NO-VF-NEXT: [[INDUCTION9:%.*]] = add i32 [[INDEX]], 1 +; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[INDUCTION9]] +; UNROLL-NO-VF-NEXT: store i32 [[INDUCTION3]], i32* [[TMP11]], align 4 ; UNROLL-NO-VF-NEXT: br label [[PRED_STORE_CONTINUE10]] ; UNROLL-NO-VF: pred.store.continue10: ; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = select i1 [[TMP2]], i32 [[TMP8]], i32 [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -72,12 +72,9 @@ ; DBG-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; DBG-NEXT: vp<[[TRANSFORMED_IV:%.+]]> = TRANSFORMED-IV vp<[[CAN_IV]]>, ir, ir ; DBG-NEXT: vp<[[STEPS1:%.+]]> = SCALAR-STEPS vp<[[TRANSFORMED_IV]]> -; DBG-NEXT: vp<[[STEPS2:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]> ; DBG-NEXT: Successor(s): cond.false ; DBG-EMPTY: ; DBG-NEXT: cond.false: -; DBG-NEXT: CLONE ir<%gep.src> = getelementptr ir<%src>, vp<[[STEPS2]]> -; DBG-NEXT: CLONE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS2]]> ; DBG-NEXT: Successor(s): cond.false.0 ; DBG-EMPTY: ; DBG-NEXT: cond.false.0: @@ -89,7 +86,10 @@ ; DBG-NEXT: Successor(s): pred.store.if, pred.store.continue ; DBG-EMPTY: ; DBG-NEXT: pred.store.if: +; DBG-NEXT: vp<[[STEPS2:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]> +; DBG-NEXT: CLONE ir<%gep.src> = getelementptr ir<%src>, vp<[[STEPS2]]> ; DBG-NEXT: CLONE ir<%l> = load ir<%gep.src> +; DBG-NEXT: CLONE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS2]]> ; DBG-NEXT: CLONE store ir<%l>, ir<%gep.dst> ; DBG-NEXT: Successor(s): pred.store.continue ; DBG-EMPTY: @@ -116,34 +116,34 @@ define void @test_scalarize_with_branch_cond(ptr %src, ptr %dst) { ; CHECK-LABEL: @test_scalarize_with_branch_cond( ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %pred.store.continue8 ] -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i1 -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i1 false, [[TMP0]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDEX]] to i1 +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i1 false, [[TMP1]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add i1 [[OFFSET_IDX]], false -; CHECK-NEXT: [[INDUCTION3:%.*]] = add i1 [[OFFSET_IDX]], true -; CHECK-NEXT: br i1 [[INDUCTION]], label %pred.store.if, label %pred.store.continue +; CHECK-NEXT: [[INDUCTION4:%.*]] = add i1 [[OFFSET_IDX]], true +; CHECK-NEXT: br i1 [[INDUCTION]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[INDUCTION4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr %dst, i64 [[INDUCTION4]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr %src, i64 [[INDUCTION4]] -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP1]], align 4 -; CHECK-NEXT: br label %pred.store.continue +; CHECK-NEXT: [[INDUCTION5:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr %src, i64 [[INDUCTION5]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr %dst, i64 [[INDUCTION5]] +; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP5:%.*]] = phi i32 [ poison, %vector.body ], [ [[TMP4]], %pred.store.if ] -; CHECK-NEXT: br i1 [[INDUCTION3]], label %pred.store.if7, label %pred.store.continue8 -; CHECK: pred.store.if7: -; CHECK-NEXT: [[INDUCTION5:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr %dst, i64 [[INDUCTION5]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr %src, i64 [[INDUCTION5]] +; CHECK-NEXT: [[TMP5:%.*]] = phi i32 [ poison, %vector.body ], [ [[TMP3]], [[PRED_STORE_IF]] ] +; CHECK-NEXT: br i1 [[INDUCTION4]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE8]] +; CHECK: pred.store.if6: +; CHECK-NEXT: [[INDUCTION7:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr %src, i64 [[INDUCTION7]] ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP2]], align 4 -; CHECK-NEXT: br label %pred.store.continue8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr %dst, i64 [[INDUCTION7]] +; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP8]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] ; CHECK: pred.store.continue8: -; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ poison, %pred.store.continue ], [ [[TMP7]], %pred.store.if7 ] +; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP7]], [[PRED_STORE_IF6]] ] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP9]], label %middle.block, label %vector.body +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label %vector.body, !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll --- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll @@ -141,13 +141,13 @@ ; VF1UF4: vector.body: ; VF1UF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ] ; VF1UF4-NEXT: [[VEC_IV:%.*]] = add i32 [[INDEX]], 0 -; VF1UF4-NEXT: [[VEC_IV4:%.*]] = add i32 [[INDEX]], 1 -; VF1UF4-NEXT: [[VEC_IV5:%.*]] = add i32 [[INDEX]], 2 -; VF1UF4-NEXT: [[VEC_IV6:%.*]] = add i32 [[INDEX]], 3 +; VF1UF4-NEXT: [[VEC_IV1:%.*]] = add i32 [[INDEX]], 1 +; VF1UF4-NEXT: [[VEC_IV2:%.*]] = add i32 [[INDEX]], 2 +; VF1UF4-NEXT: [[VEC_IV3:%.*]] = add i32 [[INDEX]], 3 ; VF1UF4-NEXT: [[TMP0:%.*]] = icmp ule i32 [[VEC_IV]], 13 -; VF1UF4-NEXT: [[TMP1:%.*]] = icmp ule i32 [[VEC_IV4]], 13 -; VF1UF4-NEXT: [[TMP2:%.*]] = icmp ule i32 [[VEC_IV5]], 13 -; VF1UF4-NEXT: [[TMP3:%.*]] = icmp ule i32 [[VEC_IV6]], 13 +; VF1UF4-NEXT: [[TMP1:%.*]] = icmp ule i32 [[VEC_IV1]], 13 +; VF1UF4-NEXT: [[TMP2:%.*]] = icmp ule i32 [[VEC_IV2]], 13 +; VF1UF4-NEXT: [[TMP3:%.*]] = icmp ule i32 [[VEC_IV3]], 13 ; VF1UF4-NEXT: br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; VF1UF4: pred.store.if: ; VF1UF4-NEXT: [[INDUCTION:%.*]] = add i32 [[INDEX]], 0 @@ -155,24 +155,24 @@ ; VF1UF4-NEXT: store i32 13, i32* [[TMP4]], align 1 ; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE]] ; VF1UF4: pred.store.continue: -; VF1UF4-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] -; VF1UF4: pred.store.if7: -; VF1UF4-NEXT: [[INDUCTION1:%.*]] = add i32 [[INDEX]], 1 -; VF1UF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION1]] +; VF1UF4-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] +; VF1UF4: pred.store.if4: +; VF1UF4-NEXT: [[INDUCTION5:%.*]] = add i32 [[INDEX]], 1 +; VF1UF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION5]] ; VF1UF4-NEXT: store i32 13, i32* [[TMP5]], align 1 -; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE8]] -; VF1UF4: pred.store.continue8: -; VF1UF4-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] -; VF1UF4: pred.store.if9: -; VF1UF4-NEXT: [[INDUCTION2:%.*]] = add i32 [[INDEX]], 2 -; VF1UF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION2]] +; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE6]] +; VF1UF4: pred.store.continue6: +; VF1UF4-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE9:%.*]] +; VF1UF4: pred.store.if7: +; VF1UF4-NEXT: [[INDUCTION8:%.*]] = add i32 [[INDEX]], 2 +; VF1UF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION8]] ; VF1UF4-NEXT: store i32 13, i32* [[TMP6]], align 1 -; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE10]] -; VF1UF4: pred.store.continue10: -; VF1UF4-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]] -; VF1UF4: pred.store.if11: -; VF1UF4-NEXT: [[INDUCTION3:%.*]] = add i32 [[INDEX]], 3 -; VF1UF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION3]] +; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE9]] +; VF1UF4: pred.store.continue9: +; VF1UF4-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE12]] +; VF1UF4: pred.store.if10: +; VF1UF4-NEXT: [[INDUCTION11:%.*]] = add i32 [[INDEX]], 3 +; VF1UF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION11]] ; VF1UF4-NEXT: store i32 13, i32* [[TMP7]], align 1 ; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE12]] ; VF1UF4: pred.store.continue12: @@ -358,13 +358,13 @@ ; VF1UF4: vector.body: ; VF1UF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ] ; VF1UF4-NEXT: [[VEC_IV:%.*]] = add i64 [[INDEX]], 0 -; VF1UF4-NEXT: [[VEC_IV4:%.*]] = add i64 [[INDEX]], 1 -; VF1UF4-NEXT: [[VEC_IV5:%.*]] = add i64 [[INDEX]], 2 -; VF1UF4-NEXT: [[VEC_IV6:%.*]] = add i64 [[INDEX]], 3 +; VF1UF4-NEXT: [[VEC_IV1:%.*]] = add i64 [[INDEX]], 1 +; VF1UF4-NEXT: [[VEC_IV2:%.*]] = add i64 [[INDEX]], 2 +; VF1UF4-NEXT: [[VEC_IV3:%.*]] = add i64 [[INDEX]], 3 ; VF1UF4-NEXT: [[TMP0:%.*]] = icmp ule i64 [[VEC_IV]], 13 -; VF1UF4-NEXT: [[TMP1:%.*]] = icmp ule i64 [[VEC_IV4]], 13 -; VF1UF4-NEXT: [[TMP2:%.*]] = icmp ule i64 [[VEC_IV5]], 13 -; VF1UF4-NEXT: [[TMP3:%.*]] = icmp ule i64 [[VEC_IV6]], 13 +; VF1UF4-NEXT: [[TMP1:%.*]] = icmp ule i64 [[VEC_IV1]], 13 +; VF1UF4-NEXT: [[TMP2:%.*]] = icmp ule i64 [[VEC_IV2]], 13 +; VF1UF4-NEXT: [[TMP3:%.*]] = icmp ule i64 [[VEC_IV3]], 13 ; VF1UF4-NEXT: br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; VF1UF4: pred.store.if: ; VF1UF4-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0 @@ -374,33 +374,33 @@ ; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE]] ; VF1UF4: pred.store.continue: ; VF1UF4-NEXT: [[TMP6:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_STORE_IF]] ] -; VF1UF4-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] -; VF1UF4: pred.store.if7: -; VF1UF4-NEXT: [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1 -; VF1UF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDUCTION1]] +; VF1UF4-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] +; VF1UF4: pred.store.if4: +; VF1UF4-NEXT: [[INDUCTION5:%.*]] = add i64 [[INDEX]], 1 +; VF1UF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDUCTION5]] ; VF1UF4-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 ; VF1UF4-NEXT: store i64 [[TMP8]], i64* [[B]], align 8 -; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE8]] -; VF1UF4: pred.store.continue8: -; VF1UF4-NEXT: [[TMP9:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP8]], [[PRED_STORE_IF7]] ] -; VF1UF4-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] -; VF1UF4: pred.store.if9: -; VF1UF4-NEXT: [[INDUCTION2:%.*]] = add i64 [[INDEX]], 2 -; VF1UF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDUCTION2]] +; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE6]] +; VF1UF4: pred.store.continue6: +; VF1UF4-NEXT: [[TMP9:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP8]], [[PRED_STORE_IF4]] ] +; VF1UF4-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE9:%.*]] +; VF1UF4: pred.store.if7: +; VF1UF4-NEXT: [[INDUCTION8:%.*]] = add i64 [[INDEX]], 2 +; VF1UF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDUCTION8]] ; VF1UF4-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8 ; VF1UF4-NEXT: store i64 [[TMP11]], i64* [[B]], align 8 -; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE10]] -; VF1UF4: pred.store.continue10: -; VF1UF4-NEXT: [[TMP12:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE8]] ], [ [[TMP11]], [[PRED_STORE_IF9]] ] -; VF1UF4-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]] -; VF1UF4: pred.store.if11: -; VF1UF4-NEXT: [[INDUCTION3:%.*]] = add i64 [[INDEX]], 3 -; VF1UF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDUCTION3]] +; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE9]] +; VF1UF4: pred.store.continue9: +; VF1UF4-NEXT: [[TMP12:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE6]] ], [ [[TMP11]], [[PRED_STORE_IF7]] ] +; VF1UF4-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE12]] +; VF1UF4: pred.store.if10: +; VF1UF4-NEXT: [[INDUCTION11:%.*]] = add i64 [[INDEX]], 3 +; VF1UF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDUCTION11]] ; VF1UF4-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8 ; VF1UF4-NEXT: store i64 [[TMP14]], i64* [[B]], align 8 ; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE12]] ; VF1UF4: pred.store.continue12: -; VF1UF4-NEXT: [[TMP15:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE10]] ], [ [[TMP14]], [[PRED_STORE_IF11]] ] +; VF1UF4-NEXT: [[TMP15:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE9]] ], [ [[TMP14]], [[PRED_STORE_IF10]] ] ; VF1UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; VF1UF4-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; VF1UF4-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll --- a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll @@ -18,13 +18,13 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ] ; CHECK-NEXT: [[VEC_IV:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[VEC_IV4:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[VEC_IV5:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IV6:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[VEC_IV1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[VEC_IV2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IV3:%.*]] = add i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i64 [[VEC_IV]], 14 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[VEC_IV4]], 14 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i64 [[VEC_IV5]], 14 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ule i64 [[VEC_IV6]], 14 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[VEC_IV1]], 14 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i64 [[VEC_IV2]], 14 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ule i64 [[VEC_IV3]], 14 ; CHECK-NEXT: br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: ; CHECK-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0 @@ -32,24 +32,24 @@ ; CHECK-NEXT: store i32 0, i32* [[TMP4]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] -; CHECK: pred.store.if7: -; CHECK-NEXT: [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[INDUCTION1]] +; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] +; CHECK: pred.store.if4: +; CHECK-NEXT: [[INDUCTION5:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[INDUCTION5]] ; CHECK-NEXT: store i32 0, i32* [[TMP5]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] -; CHECK: pred.store.continue8: -; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] -; CHECK: pred.store.if9: -; CHECK-NEXT: [[INDUCTION2:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[INDUCTION2]] +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] +; CHECK: pred.store.continue6: +; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE9:%.*]] +; CHECK: pred.store.if7: +; CHECK-NEXT: [[INDUCTION8:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[INDUCTION8]] ; CHECK-NEXT: store i32 0, i32* [[TMP6]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]] -; CHECK: pred.store.continue10: -; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]] -; CHECK: pred.store.if11: -; CHECK-NEXT: [[INDUCTION3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[INDUCTION3]] +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE9]] +; CHECK: pred.store.continue9: +; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE12]] +; CHECK: pred.store.if10: +; CHECK-NEXT: [[INDUCTION11:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[INDUCTION11]] ; CHECK-NEXT: store i32 0, i32* [[TMP7]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] ; CHECK: pred.store.continue12: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll @@ -20,8 +20,6 @@ ; CHECK-NEXT: CLONE ir<%tmp2> = getelementptr ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: CLONE ir<%tmp3> = load ir<%tmp2> ; CHECK-NEXT: CLONE store ir<0>, ir<%tmp2> -; CHECK-NEXT: CLONE ir<%tmp4> = zext ir<%tmp3> -; CHECK-NEXT: CLONE ir<%tmp5> = trunc ir<%tmp4> ; CHECK-NEXT: Successor(s): if.then ; CHECK: if.then: @@ -33,6 +31,8 @@ ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK: pred.store.if: +; CHECK-NEXT: CLONE ir<%tmp4> = zext ir<%tmp3> +; CHECK-NEXT: CLONE ir<%tmp5> = trunc ir<%tmp4> ; CHECK-NEXT: CLONE store ir<%tmp5>, ir<%tmp2> ; CHECK-NEXT: Successor(s): pred.store.continue