diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1680,6 +1680,15 @@ return IsSame(Scalars, ReuseShuffleIndices); } + /// \return Final vectorization factor for the node. Defined by the total + /// number of vectorized scalars, including those, used several times in the + /// entry and counted in the \a ReuseShuffleIndices, if any. + unsigned getVectorFactor() const { + if (!ReuseShuffleIndices.empty()) + return ReuseShuffleIndices.size(); + return Scalars.size(); + }; + /// A vector of scalars. ValueList Scalars; @@ -4366,13 +4375,10 @@ if (MinBWs.count(VL[0])) VecTy = FixedVectorType::get( IntegerType::get(F->getContext(), MinBWs[VL[0]].first), VL.size()); - auto *FinalVecTy = VecTy; + unsigned EntryVF = E->getVectorFactor(); + auto *FinalVecTy = FixedVectorType::get(VecTy->getElementType(), EntryVF); - unsigned ReuseShuffleNumbers = E->ReuseShuffleIndices.size(); bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty(); - if (NeedToShuffleReuses) - FinalVecTy = - FixedVectorType::get(VecTy->getElementType(), ReuseShuffleNumbers); // FIXME: it tries to fix a problem with MSVC buildbots. TargetTransformInfo &TTIRef = *TTI; auto &&AdjustExtractsCost = [this, &TTIRef, CostKind, VL, VecTy, @@ -4643,7 +4649,7 @@ ++Idx; } } - Idx = ReuseShuffleNumbers; + Idx = EntryVF; for (Value *V : VL) { if (ShuffleOrOp == Instruction::ExtractElement) { auto *EE = cast(V); @@ -4765,7 +4771,7 @@ TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy, TTI::getCastContextHint(VL0), CostKind, VL0); if (NeedToShuffleReuses) { - CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; + CommonCost -= (EntryVF - VL.size()) * ScalarEltCost; } // Calculate the cost of this instruction. @@ -4790,7 +4796,7 @@ TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, Builder.getInt1Ty(), CmpInst::BAD_ICMP_PREDICATE, CostKind, VL0); if (NeedToShuffleReuses) { - CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; + CommonCost -= (EntryVF - VL.size()) * ScalarEltCost; } auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size()); InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost; @@ -4895,7 +4901,7 @@ TTI->getArithmeticInstrCost(E->getOpcode(), ScalarTy, CostKind, Op1VK, Op2VK, Op1VP, Op2VP, Operands, VL0); if (NeedToShuffleReuses) { - CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; + CommonCost -= (EntryVF - VL.size()) * ScalarEltCost; } InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost; InstructionCost VecCost = @@ -4913,7 +4919,7 @@ InstructionCost ScalarEltCost = TTI->getArithmeticInstrCost( Instruction::Add, ScalarTy, CostKind, Op1VK, Op2VK); if (NeedToShuffleReuses) { - CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; + CommonCost -= (EntryVF - VL.size()) * ScalarEltCost; } InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost; InstructionCost VecCost = TTI->getArithmeticInstrCost( @@ -4927,7 +4933,7 @@ InstructionCost ScalarEltCost = TTI->getMemoryOpCost( Instruction::Load, ScalarTy, Alignment, 0, CostKind, VL0); if (NeedToShuffleReuses) { - CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; + CommonCost -= (EntryVF - VL.size()) * ScalarEltCost; } InstructionCost ScalarLdCost = VecTy->getNumElements() * ScalarEltCost; InstructionCost VecLdCost; @@ -4970,7 +4976,7 @@ InstructionCost ScalarEltCost = TTI->getIntrinsicInstrCost(CostAttrs, CostKind); if (NeedToShuffleReuses) { - CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; + CommonCost -= (EntryVF - VL.size()) * ScalarEltCost; } InstructionCost ScalarCallCost = VecTy->getNumElements() * ScalarEltCost; @@ -5066,8 +5072,7 @@ (ForReduction && AreVectorizableGathers(VectorizableTree[0].get(), VectorizableTree[0]->Scalars.size()) && - (VectorizableTree[0]->Scalars.size() > 2 || - VectorizableTree[0]->ReuseShuffleIndices.size() > 2)))) + VectorizableTree[0]->getVectorFactor() > 2))) return true; if (VectorizableTree.size() != 2) @@ -5530,18 +5535,11 @@ } else { // Try to find nodes with the same vector factor. assert(UsedTEs.size() == 2 && "Expected at max 2 permuted entries."); - // FIXME: Shall be replaced by GetVF function once non-power-2 patch is - // landed. - auto &&GetVF = [](const TreeEntry *TE) { - if (!TE->ReuseShuffleIndices.empty()) - return TE->ReuseShuffleIndices.size(); - return TE->Scalars.size(); - }; DenseMap VFToTE; for (const TreeEntry *TE : UsedTEs.front()) - VFToTE.try_emplace(GetVF(TE), TE); + VFToTE.try_emplace(TE->getVectorFactor(), TE); for (const TreeEntry *TE : UsedTEs.back()) { - auto It = VFToTE.find(GetVF(TE)); + auto It = VFToTE.find(TE->getVectorFactor()); if (It != VFToTE.end()) { VF = It->first; Entries.push_back(It->second); @@ -5933,9 +5931,7 @@ } bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty(); - unsigned VF = E->Scalars.size(); - if (NeedToShuffleReuses) - VF = E->ReuseShuffleIndices.size(); + unsigned VF = E->getVectorFactor(); ShuffleInstructionBuilder ShuffleBuilder(Builder, VF); if (E->State == TreeEntry::NeedToGather) { if (E->getMainOp())