Index: llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -360,7 +360,9 @@ /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, /// according to the information gathered by Legal when it checked if it is /// legal to vectorize the loop. This method creates VPlans using VPRecipes. - void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF); + /// When the Maximum VF is set, the corresponding Minimum VF must also be set. + void buildVPlansWithVPRecipes(const MinMaxVFCandidates &MinVFs, + const MinMaxVFCandidates &MaxVFs); /// Adjust the recipes for any inloop reductions. The chain of instructions /// leading from the loop exit instr to the phi need to be converted to Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1262,7 +1262,8 @@ /// This method checks every power of two up to MaxVF. If UserVF is not ZERO /// then this vectorization factor will be selected if vectorization is /// possible. - VectorizationFactor selectVectorizationFactor(ElementCount MaxVF); + VectorizationFactor + selectVectorizationFactor(const MinMaxVFCandidates &MaxVF); VectorizationFactor selectEpilogueVectorizationFactor(const ElementCount MaxVF, const LoopVectorizationPlanner &LVP); @@ -1526,7 +1527,7 @@ /// Returns true if the target machine supports all of the reduction /// variables found for the given VF. - bool canVectorizeReductions(ElementCount VF) { + bool canVectorizeReductions(ElementCount VF) const { return (all_of(Legal->getReductionVars(), [&](auto &Reduction) -> bool { RecurrenceDescriptor RdxDesc = Reduction.second; return TTI.isLegalToVectorizeReduction(RdxDesc, VF); @@ -1964,6 +1965,35 @@ collectSupportedLoops(*InnerL, LI, ORE, V); } +/// Generate power-of-2 VFs from MinFactors up to and including MaxFactors. It +/// will generate fixed- and/or scalable VFs depending on which values are set. +/// For scalable VFs, it is only the minimum known value that is a power of 2, +/// the eventual VF may not be if the runtime value for vscale is not also a +/// power of two. +void genFeasibleVFCandidates(const LoopVectorizationCostModel &CM, + SmallVectorImpl &List, + const MinMaxVFCandidates &MinFactors, + const MinMaxVFCandidates &MaxFactors) { + auto AddToListIfVectorizable = [&](const ElementCount &VF) { + if (!CM.canVectorizeReductions(VF)) + return; + + List.push_back(VF); + }; + + if (MaxFactors.hasFixedVF()) + for (ElementCount VF = *MinFactors.getFixedVF(), + Max = *MaxFactors.getFixedVF(); + ElementCount::isKnownLE(VF, Max); VF *= 2) + AddToListIfVectorizable(VF); + + if (MaxFactors.hasScalableVF()) + for (ElementCount VF = *MinFactors.getScalableVF(), + Max = *MaxFactors.getScalableVF(); + ElementCount::isKnownLE(VF, Max); VF *= 2) + AddToListIfVectorizable(VF); +} + namespace { /// The LoopVectorize Pass. @@ -5905,8 +5935,11 @@ return MaxVF; } -VectorizationFactor -LoopVectorizationCostModel::selectVectorizationFactor(ElementCount MaxVF) { +VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor( + const MinMaxVFCandidates &MaxFactors) { + // Ignore the Scalable MaxVF in decision making process for now. + ElementCount MaxVF = *MaxFactors.getFixedVF(); + // FIXME: This can be fixed for scalable vectors later, because at this stage // the LoopVectorizer will only consider vectorizing a loop with scalable // vectors when the loop has a hint to enable vectorization for a given VF. @@ -7787,19 +7820,19 @@ CM.invalidateCostModelingDecisions(); } - ElementCount MaxVF = + ElementCount MaxUserVF = (UserVF && UserVF.isScalable() && MaxFactors.hasScalableVF()) ? *MaxFactors.getScalableVF() : *MaxFactors.getFixedVF(); - assert(MaxVF.isNonZero() && "MaxVF is zero."); + assert(MaxUserVF.isNonZero() && "MaxUserVF is zero."); - bool UserVFIsLegal = ElementCount::isKnownLE(UserVF, MaxVF); + bool UserVFIsLegal = ElementCount::isKnownLE(UserVF, MaxUserVF); if (!UserVF.isZero() && - (UserVFIsLegal || (UserVF.isScalable() && MaxVF.isScalable()))) { - // FIXME: MaxVF is temporarily used inplace of UserVF for illegal + (UserVFIsLegal || (UserVF.isScalable() && MaxUserVF.isScalable()))) { + // FIXME: MaxUserVF is temporarily used inplace of UserVF for illegal // scalable VFs here, this should be reverted to only use legal UserVFs once // the loop below supports scalable VFs. - ElementCount VF = UserVFIsLegal ? UserVF : MaxVF; + ElementCount VF = UserVFIsLegal ? UserVF : MaxUserVF; LLVM_DEBUG(dbgs() << "LV: Using " << (UserVFIsLegal ? "user" : "max") << " VF " << VF << ".\n"); assert(isPowerOf2_32(VF.getKnownMinValue()) && @@ -7808,16 +7841,17 @@ // profitable to scalarize. CM.selectUserVectorizationFactor(VF); CM.collectInLoopReductions(); - buildVPlansWithVPRecipes(VF, VF); + buildVPlansWithVPRecipes({VF}, {VF}); LLVM_DEBUG(printPlans(dbgs())); return {{VF, 0}}; } - assert(!MaxVF.isScalable() && - "Scalable vectors not yet supported beyond this point"); + MinMaxVFCandidates MinFactors(ElementCount::getFixed(1), + ElementCount::getScalable(1)); - for (ElementCount VF = ElementCount::getFixed(1); - ElementCount::isKnownLE(VF, MaxVF); VF *= 2) { + SmallVector VFCandidates; + genFeasibleVFCandidates(CM, VFCandidates, MinFactors, MaxFactors); + for (auto &VF : VFCandidates) { // Collect Uniform and Scalar instructions after vectorization with VF. CM.collectUniformsAndScalars(VF); @@ -7829,13 +7863,19 @@ CM.collectInLoopReductions(); - buildVPlansWithVPRecipes(ElementCount::getFixed(1), MaxVF); + buildVPlansWithVPRecipes(MinFactors, MaxFactors); + LLVM_DEBUG(printPlans(dbgs())); - if (MaxVF.isScalar()) - return VectorizationFactor::Disabled(); + if (Optional FixedMaxVF = MaxFactors.getFixedVF()) + if (FixedMaxVF->isScalar()) { + assert((!MaxFactors.hasScalableVF() || + MaxFactors.getScalableVF()->isScalar()) && + "Unexpected max scalable VF"); + return VectorizationFactor::Disabled(); + } // Select the optimal vectorization factor. - return CM.selectVectorizationFactor(MaxVF); + return CM.selectVectorizationFactor(MaxFactors); } void LoopVectorizationPlanner::setBestPlan(ElementCount VF, unsigned UF) { @@ -8740,8 +8780,8 @@ return tryToWiden(Instr, *Plan); } -void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, - ElementCount MaxVF) { +void LoopVectorizationPlanner::buildVPlansWithVPRecipes( + const MinMaxVFCandidates &MinVFs, const MinMaxVFCandidates &MaxVFs) { assert(OrigLoop->isInnermost() && "Inner loop expected."); // Collect instructions from the original loop that will become trivially dead @@ -8765,12 +8805,24 @@ for (Instruction *I : DeadInstructions) SinkAfter.erase(I); - auto MaxVFPlusOne = MaxVF.getWithIncrement(1); - for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFPlusOne);) { - VFRange SubRange = {VF, MaxVFPlusOne}; - VPlans.push_back( - buildVPlanWithVPRecipes(SubRange, DeadInstructions, SinkAfter)); - VF = SubRange.End; + auto CollectVPlans = [&](const ElementCount &MinVF, const ElementCount &MaxVF) { + auto MaxVFPlusOne = MaxVF.getWithIncrement(1); + for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFPlusOne);) { + VFRange SubRange = {VF, MaxVFPlusOne}; + VPlans.push_back( + buildVPlanWithVPRecipes(SubRange, DeadInstructions, SinkAfter)); + VF = SubRange.End; + } + }; + + if (MaxVFs.hasFixedVF()) { + assert(MinVFs.hasFixedVF() && "Expected MinVFs.FixedVF to be set"); + CollectVPlans(*MinVFs.getFixedVF(), *MaxVFs.getFixedVF()); + } + + if (MaxVFs.hasScalableVF()) { + assert(MinVFs.hasScalableVF() && "Expected MinVFs.ScalableVF to be set"); + CollectVPlans(*MinVFs.getScalableVF(), *MaxVFs.getScalableVF()); } }