diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2911,8 +2911,11 @@ } if (Last->State != TreeEntry::NeedToGather) { for (Value *V : VL) { - [[maybe_unused]] const TreeEntry *TE = getTreeEntry(V); - assert((!TE || TE == Last) && "Scalar already in tree!"); + const TreeEntry *TE = getTreeEntry(V); + assert((!TE || TE == Last || doesNotNeedToBeScheduled(V)) && + "Scalar already in tree!"); + if (TE) + continue; ScalarToTreeEntry[V] = Last; } // Update the scheduler bundle to point to this TreeEntry. @@ -5813,7 +5816,8 @@ // Check that none of the instructions in the bundle are already in the tree. for (Value *V : VL) { - if (!IsScatterVectorizeUserTE && !isa(V)) + if ((!IsScatterVectorizeUserTE && !isa(V)) || + doesNotNeedToBeScheduled(V)) continue; if (getTreeEntry(V)) { LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V @@ -7560,6 +7564,12 @@ E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode(); SetVector UniqueValues(VL.begin(), VL.end()); const unsigned Sz = UniqueValues.size(); + SmallBitVector UsedScalars(Sz, false); + for (unsigned I = 0; I < Sz; ++I) { + if (getTreeEntry(UniqueValues[I]) == E) + continue; + UsedScalars.set(I); + } auto GetCostDiff = [=](function_ref ScalarEltCost, function_ref VectorCost) { @@ -7569,10 +7579,13 @@ // For some of the instructions no need to calculate cost for each // particular instruction, we can use the cost of the single // instruction x total number of scalar instructions. - ScalarCost = Sz * ScalarEltCost(0); + ScalarCost = (Sz - UsedScalars.count()) * ScalarEltCost(0); } else { - for (unsigned I = 0; I < Sz; ++I) + for (unsigned I = 0; I < Sz; ++I) { + if (UsedScalars.test(I)) + continue; ScalarCost += ScalarEltCost(I); + } } InstructionCost VecCost = VectorCost(CommonCost); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll @@ -6,23 +6,18 @@ ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] ; CHECK: if: ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[ARG:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x ptr> [[TMP1]], <4 x ptr> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <4 x ptr> [[SHUFFLE]], <4 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP2]], i32 8, <4 x i1> , <4 x i64> poison) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x ptr> [[TMP1]], <4 x ptr> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <4 x ptr> [[TMP2]], <4 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP3]], i32 8, <4 x i1> , <4 x i64> poison) ; CHECK-NEXT: br label [[JOIN:%.*]] ; CHECK: else: -; CHECK-NEXT: [[ARG_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 8 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x ptr> poison, ptr [[ARG]], i32 0 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x ptr> [[TMP4]], <2 x ptr> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, <2 x ptr> [[SHUFFLE1]], <2 x i64> -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> poison, ptr [[ARG]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x ptr> [[TMP5]], <2 x ptr> poison, <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x ptr> [[TMP6]], <4 x ptr> [[TMP7]], <4 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x ptr> [[TMP8]], ptr [[ARG_1]], i32 2 -; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP9]], i32 8, <4 x i1> , <4 x i64> poison) +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x ptr> poison, ptr [[ARG]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x ptr> [[TMP5]], <4 x ptr> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, <4 x ptr> [[TMP6]], <4 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP7]], i32 8, <4 x i1> , <4 x i64> poison) ; CHECK-NEXT: br label [[JOIN]] ; CHECK: join: -; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i64> [ [[TMP3]], [[IF]] ], [ [[TMP10]], [[ELSE]] ] +; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i64> [ [[TMP4]], [[IF]] ], [ [[TMP8]], [[ELSE]] ] ; CHECK-NEXT: ret void ; br i1 %c, label %if, label %else