diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2671,6 +2671,8 @@ bool hasVF(ElementCount VF) { return VFs.count(VF); } + bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); } + const std::string &getName() const { return Name; } void setName(const Twine &newName) { Name = newName.str(); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -123,6 +123,7 @@ } } + bool ScalarVFOnly = Plan.hasScalarVFOnly(); // Try to sink each replicate or scalar IV steps recipe in the worklist. while (!WorkList.empty()) { VPBasicBlock *SinkTo; @@ -133,7 +134,7 @@ SinkCandidate->mayReadOrWriteMemory()) continue; if (auto *RepR = dyn_cast(SinkCandidate)) { - if (RepR->isUniform()) + if (!ScalarVFOnly && RepR->isUniform()) continue; } else if (!isa(SinkCandidate)) continue; @@ -159,6 +160,8 @@ continue; if (NeedsDuplicating) { + if (ScalarVFOnly) + continue; Instruction *I = cast( cast(SinkCandidate)->getUnderlyingValue()); auto *Clone = diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -76,8 +76,6 @@ ; DBG-NEXT: Successor(s): cond.false ; DBG-EMPTY: ; DBG-NEXT: cond.false: -; DBG-NEXT: CLONE ir<%gep.src> = getelementptr ir<%src>, vp<[[STEPS2]]> -; DBG-NEXT: CLONE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS2]]> ; DBG-NEXT: Successor(s): cond.false.0 ; DBG-EMPTY: ; DBG-NEXT: cond.false.0: @@ -89,7 +87,9 @@ ; DBG-NEXT: Successor(s): pred.store.if, pred.store.continue ; DBG-EMPTY: ; DBG-NEXT: pred.store.if: +; DBG-NEXT: CLONE ir<%gep.src> = getelementptr ir<%src>, vp<[[STEPS2]]> ; DBG-NEXT: CLONE ir<%l> = load ir<%gep.src> +; DBG-NEXT: CLONE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS2]]> ; DBG-NEXT: CLONE store ir<%l>, ir<%gep.dst> ; DBG-NEXT: Successor(s): pred.store.continue ; DBG-EMPTY: @@ -124,9 +124,9 @@ ; CHECK-NEXT: br i1 [[INDUCTION]], label %pred.store.if, label %pred.store.continue ; CHECK: pred.store.if: ; CHECK-NEXT: [[INDUCTION4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr %dst, i64 [[INDUCTION4]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr %src, i64 [[INDUCTION4]] ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr %dst, i64 [[INDUCTION4]] ; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP1]], align 4 ; CHECK-NEXT: br label %pred.store.continue ; CHECK: pred.store.continue: @@ -134,9 +134,9 @@ ; CHECK-NEXT: br i1 [[INDUCTION3]], label %pred.store.if4, label %pred.store.continue5 ; CHECK: pred.store.if4: ; CHECK-NEXT: [[INDUCTION5:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr %dst, i64 [[INDUCTION5]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr %src, i64 [[INDUCTION5]] ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr %dst, i64 [[INDUCTION5]] ; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP2]], align 4 ; CHECK-NEXT: br label %pred.store.continue5 ; CHECK: pred.store.continue5: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll @@ -20,8 +20,6 @@ ; CHECK-NEXT: CLONE ir<%tmp2> = getelementptr ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: CLONE ir<%tmp3> = load ir<%tmp2> ; CHECK-NEXT: CLONE store ir<0>, ir<%tmp2> -; CHECK-NEXT: CLONE ir<%tmp4> = zext ir<%tmp3> -; CHECK-NEXT: CLONE ir<%tmp5> = trunc ir<%tmp4> ; CHECK-NEXT: Successor(s): if.then ; CHECK: if.then: @@ -33,6 +31,8 @@ ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK: pred.store.if: +; CHECK-NEXT: CLONE ir<%tmp4> = zext ir<%tmp3> +; CHECK-NEXT: CLONE ir<%tmp5> = trunc ir<%tmp4> ; CHECK-NEXT: CLONE store ir<%tmp5>, ir<%tmp2> ; CHECK-NEXT: Successor(s): pred.store.continue