Index: llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -348,7 +348,9 @@ /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, /// according to the information gathered by Legal when it checked if it is /// legal to vectorize the loop. This method creates VPlans using VPRecipes. - void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF); + /// When the Maximum VF is set, the corresponding Minimum VF must also be set. + void buildVPlansWithVPRecipes(const MinMaxVFCandidates &MinVFs, + const MinMaxVFCandidates &MaxVFs); /// Adjust the recipes for any inloop reductions. The chain of instructions /// leading from the loop exit instr to the phi need to be converted to Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1224,7 +1224,8 @@ /// This method checks every power of two up to MaxVF. If UserVF is not ZERO /// then this vectorization factor will be selected if vectorization is /// possible. - VectorizationFactor selectVectorizationFactor(ElementCount MaxVF); + VectorizationFactor + selectVectorizationFactor(const MinMaxVFCandidates &MaxVF); VectorizationFactor selectEpilogueVectorizationFactor(const ElementCount MaxVF, const LoopVectorizationPlanner &LVP); @@ -1488,7 +1489,7 @@ /// Returns true if the target machine supports all of the reduction /// variables found for the given VF. - bool canVectorizeReductions(ElementCount VF) { + bool canVectorizeReductions(ElementCount VF) const { return (all_of(Legal->getReductionVars(), [&](auto &Reduction) -> bool { RecurrenceDescriptor RdxDesc = Reduction.second; return TTI.isLegalToVectorizeReduction(RdxDesc, VF); @@ -2131,6 +2132,26 @@ collectSupportedLoops(*InnerL, LI, ORE, V); } +/// Generate power-of-2 VFs from Min up to and including Max. It will generate +/// fixed- and/or scalable VFs depending on which values are set. For scalable +/// VFs, it is only the minimum known value that is a power of 2, the eventual +/// VF may not be if the runtime value for vscale is not also a power of two. +void genFeasibleVFCandidates(const LoopVectorizationCostModel &CM, + SmallVectorImpl &List, + const MinMaxVFCandidates &Min, + const MinMaxVFCandidates &Max) { + auto GenerateRange = [&](const ElementCount &MinVF, + const ElementCount &MaxVF) { + for (auto VF = MinVF; ElementCount::isKnownLE(VF, MaxVF); VF *= 2) + List.push_back(VF); + }; + + if (Max.hasFixedVF()) + GenerateRange(*Min.getFixedVF(), *Max.getFixedVF()); + if (Max.hasScalableVF()) + GenerateRange(*Min.getScalableVF(), *Max.getScalableVF()); +} + namespace { /// The LoopVectorize Pass. @@ -5923,8 +5944,11 @@ return MaxVF; } -VectorizationFactor -LoopVectorizationCostModel::selectVectorizationFactor(ElementCount MaxVF) { +VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor( + const MinMaxVFCandidates &MaxFactors) { + // Ignore the Scalable MaxVF in decision making process for now. + ElementCount MaxVF = *MaxFactors.getFixedVF(); + // FIXME: This can be fixed for scalable vectors later, because at this stage // the LoopVectorizer will only consider vectorizing a loop with scalable // vectors when the loop has a hint to enable vectorization for a given VF. @@ -7808,19 +7832,19 @@ CM.invalidateCostModelingDecisions(); } - ElementCount MaxVF = + ElementCount MaxUserVF = (UserVF && UserVF.isScalable() && MaxFactors.hasScalableVF()) ? *MaxFactors.getScalableVF() : *MaxFactors.getFixedVF(); - assert(MaxVF.isNonZero() && "MaxVF is zero."); + assert(MaxUserVF.isNonZero() && "MaxUserVF is zero."); - bool UserVFIsLegal = ElementCount::isKnownLE(UserVF, MaxVF); + bool UserVFIsLegal = ElementCount::isKnownLE(UserVF, MaxUserVF); if (!UserVF.isZero() && - (UserVFIsLegal || (UserVF.isScalable() && MaxVF.isScalable()))) { + (UserVFIsLegal || (UserVF.isScalable() && MaxUserVF.isScalable()))) { // FIXME: MaxVF is temporarily used inplace of UserVF for illegal scalable // VFs here, this should be reverted to only use legal UserVFs once the // loop below supports scalable VFs. - ElementCount VF = UserVFIsLegal ? UserVF : MaxVF; + ElementCount VF = UserVFIsLegal ? UserVF : MaxUserVF; LLVM_DEBUG(dbgs() << "LV: Using " << (UserVFIsLegal ? "user" : "max") << " VF " << VF << ".\n"); assert(isPowerOf2_32(VF.getKnownMinValue()) && @@ -7829,16 +7853,17 @@ // profitable to scalarize. CM.selectUserVectorizationFactor(VF); CM.collectInLoopReductions(); - buildVPlansWithVPRecipes(VF, VF); + buildVPlansWithVPRecipes({VF}, {VF}); LLVM_DEBUG(printPlans(dbgs())); return {{VF, 0}}; } - assert(!MaxVF.isScalable() && - "Scalable vectors not yet supported beyond this point"); + MinMaxVFCandidates MinFactors(ElementCount::getFixed(1), + ElementCount::getScalable(1)); - for (ElementCount VF = ElementCount::getFixed(1); - ElementCount::isKnownLE(VF, MaxVF); VF *= 2) { + SmallVector VFCandidates; + genFeasibleVFCandidates(CM, VFCandidates, MinFactors, MaxFactors); + for (auto &VF : VFCandidates) { // Collect Uniform and Scalar instructions after vectorization with VF. CM.collectUniformsAndScalars(VF); @@ -7850,13 +7875,15 @@ CM.collectInLoopReductions(); - buildVPlansWithVPRecipes(ElementCount::getFixed(1), MaxVF); + buildVPlansWithVPRecipes(MinFactors, MaxFactors); + LLVM_DEBUG(printPlans(dbgs())); - if (MaxVF.isScalar()) - return VectorizationFactor::Disabled(); + if (Optional FixedMaxVF = MaxFactors.getFixedVF()) + if (FixedMaxVF->isScalar() && !MaxFactors.hasScalableVF()) + return VectorizationFactor::Disabled(); // Select the optimal vectorization factor. - return CM.selectVectorizationFactor(MaxVF); + return CM.selectVectorizationFactor(MaxFactors); } void LoopVectorizationPlanner::setBestPlan(ElementCount VF, unsigned UF) { @@ -8755,8 +8782,8 @@ return toVPRecipeResult(tryToWiden(Instr, *Plan)); } -void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, - ElementCount MaxVF) { +void LoopVectorizationPlanner::buildVPlansWithVPRecipes( + const MinMaxVFCandidates &MinVFs, const MinMaxVFCandidates &MaxVFs) { assert(OrigLoop->isInnermost() && "Inner loop expected."); // Collect instructions from the original loop that will become trivially dead @@ -8780,12 +8807,24 @@ for (Instruction *I : DeadInstructions) SinkAfter.erase(I); - auto MaxVFPlusOne = MaxVF.getWithIncrement(1); - for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFPlusOne);) { - VFRange SubRange = {VF, MaxVFPlusOne}; - VPlans.push_back( - buildVPlanWithVPRecipes(SubRange, DeadInstructions, SinkAfter)); - VF = SubRange.End; + auto CollectVPlans = [&](const ElementCount &MinVF, const ElementCount &MaxVF) { + auto MaxVFPlusOne = MaxVF.getWithIncrement(1); + for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFPlusOne);) { + VFRange SubRange = {VF, MaxVFPlusOne}; + VPlans.push_back( + buildVPlanWithVPRecipes(SubRange, DeadInstructions, SinkAfter)); + VF = SubRange.End; + } + }; + + if (MaxVFs.hasFixedVF()) { + assert(MinVFs.hasFixedVF() && "Expected MinVFs.FixedVF to be set"); + CollectVPlans(*MinVFs.getFixedVF(), *MaxVFs.getFixedVF()); + } + + if (MaxVFs.hasScalableVF()) { + assert(MinVFs.hasScalableVF() && "Expected MinVFs.ScalableVF to be set"); + CollectVPlans(*MinVFs.getScalableVF(), *MaxVFs.getScalableVF()); } }