diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2433,7 +2433,8 @@ /// \returns the cost of the vectorizable entry. InstructionCost getEntryCost(const TreeEntry *E, - ArrayRef VectorizedVals); + ArrayRef VectorizedVals, + SmallPtrSetImpl &CheckedExtracts); /// This is the recursive part of buildTree. void buildTree_rec(ArrayRef Roots, unsigned Depth, @@ -6731,6 +6732,7 @@ InstructionCost Cost = 0; ArrayRef VectorizedVals; BoUpSLP &R; + SmallPtrSetImpl &CheckedExtracts; constexpr static TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; InstructionCost getBuildVectorCost(ArrayRef VL, Value *Root) { @@ -6923,8 +6925,10 @@ public: ShuffleCostEstimator(TargetTransformInfo &TTI, - ArrayRef VectorizedVals, BoUpSLP &R) - : TTI(TTI), VectorizedVals(VectorizedVals), R(R) {} + ArrayRef VectorizedVals, BoUpSLP &R, + SmallPtrSetImpl &CheckedExtracts) + : TTI(TTI), VectorizedVals(VectorizedVals), R(R), + CheckedExtracts(CheckedExtracts) {} Value *adjustExtracts(const TreeEntry *E, ArrayRef Mask, TTI::ShuffleKind ShuffleKind) { if (Mask.empty()) @@ -6939,7 +6943,6 @@ return nullptr; } DenseMap ExtractVectorsTys; - SmallPtrSet CheckedExtracts; for (auto [I, V] : enumerate(VL)) { // Ignore non-extractelement scalars. if (isa(V) || (!Mask.empty() && Mask[I] == UndefMaskElem)) @@ -7070,8 +7073,9 @@ } }; -InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, - ArrayRef VectorizedVals) { +InstructionCost +BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, + SmallPtrSetImpl &CheckedExtracts) { ArrayRef VL = E->Scalars; Type *ScalarTy = VL[0]->getType(); @@ -7098,7 +7102,8 @@ return 0; if (isa(VL[0])) return InstructionCost::getInvalid(); - ShuffleCostEstimator Estimator(*TTI, VectorizedVals, *this); + ShuffleCostEstimator Estimator(*TTI, VectorizedVals, *this, + CheckedExtracts); unsigned VF = E->getVectorFactor(); SmallVector ReuseShuffleIndicies(E->ReuseShuffleIndices.begin(), E->ReuseShuffleIndices.end()); @@ -8224,6 +8229,7 @@ unsigned BundleWidth = VectorizableTree[0]->Scalars.size(); + SmallPtrSet CheckedExtracts; for (unsigned I = 0, E = VectorizableTree.size(); I < E; ++I) { TreeEntry &TE = *VectorizableTree[I]; if (TE.State == TreeEntry::NeedToGather) { @@ -8239,7 +8245,7 @@ } } - InstructionCost C = getEntryCost(&TE, VectorizedVals); + InstructionCost C = getEntryCost(&TE, VectorizedVals, CheckedExtracts); Cost += C; LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C << " for bundle that starts with " << *TE.Scalars[0] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reused-extractelements.ll b/llvm/test/Transforms/SLPVectorizer/X86/reused-extractelements.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/reused-extractelements.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reused-extractelements.ll @@ -8,7 +8,7 @@ ; YAML-NEXT: Function: g ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'SLP vectorized with cost ' -; YAML-NEXT: - Cost: '-2' +; YAML-NEXT: - Cost: '-1' ; YAML-NEXT: - String: ' and with tree size ' ; YAML-NEXT: - TreeSize: '4'