Index: llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -357,7 +357,9 @@ /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, /// according to the information gathered by Legal when it checked if it is /// legal to vectorize the loop. This method creates VPlans using VPRecipes. - void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF); + /// When the Maximum VF is set, the corresponding Minimum VF must also be set. + void buildVPlansWithVPRecipes(const OptionalVFCandidates &MinVFs, + const OptionalVFCandidates &MaxVFs); /// Adjust the recipes for any inloop reductions. The chain of instructions /// leading from the loop exit instr to the phi need to be converted to Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1265,7 +1265,8 @@ /// This method checks every power of two up to MaxVF. If UserVF is not ZERO /// then this vectorization factor will be selected if vectorization is /// possible. - VectorizationFactor selectVectorizationFactor(ElementCount MaxVF); + VectorizationFactor + selectVectorizationFactor(const OptionalVFCandidates &MaxVF); VectorizationFactor selectEpilogueVectorizationFactor(const ElementCount MaxVF, const LoopVectorizationPlanner &LVP); @@ -1959,6 +1960,26 @@ collectSupportedLoops(*InnerL, LI, ORE, V); } +/// Generate all scalable power-of-2 VFs from MinFactors up to +/// and including MaxFactors. It will generate fixed- and/or scalable VFs +/// depending on which values are set. +static void +genAllPowerOfTwoFactorsUptoMaxFactors(SmallVectorImpl &List, + const OptionalVFCandidates &MinFactors, + const OptionalVFCandidates &MaxFactors) { + if (MaxFactors.hasFixedVF()) + for (ElementCount VF = *MinFactors.getFixedVF(), + Max = *MaxFactors.getFixedVF(); + ElementCount::isKnownLE(VF, Max); VF *= 2) + List.push_back(VF); + + if (MaxFactors.hasScalableVF()) + for (ElementCount VF = *MinFactors.getScalableVF(), + Max = *MaxFactors.getScalableVF(); + ElementCount::isKnownLE(VF, Max); VF *= 2) + List.push_back(VF); +} + namespace { /// The LoopVectorize Pass. @@ -5890,8 +5911,11 @@ return MaxVF; } -VectorizationFactor -LoopVectorizationCostModel::selectVectorizationFactor(ElementCount MaxVF) { +VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor( + const OptionalVFCandidates &MaxFactors) { + // Ignore the Scalable MaxVF in decision making process for now. + ElementCount MaxVF = *MaxFactors.getFixedVF(); + // FIXME: This can be fixed for scalable vectors later, because at this stage // the LoopVectorizer will only consider vectorizing a loop with scalable // vectors when the loop has a hint to enable vectorization for a given VF. @@ -7771,18 +7795,18 @@ CM.invalidateCostModelingDecisions(); } - ElementCount MaxVF = (UserVF.isScalable() && MaxFactors.hasScalableVF()) - ? *MaxFactors.getScalableVF() - : *MaxFactors.getFixedVF(); - assert(MaxVF.isNonZero() && "MaxVF is zero."); + ElementCount MaxUserVF = (UserVF.isScalable() && MaxFactors.hasScalableVF()) + ? *MaxFactors.getScalableVF() + : *MaxFactors.getFixedVF(); + assert(MaxUserVF.isNonZero() && "MaxUserVF is zero."); - bool UserVFIsLegal = ElementCount::isKnownLE(UserVF, MaxVF); + bool UserVFIsLegal = ElementCount::isKnownLE(UserVF, MaxUserVF); if (!UserVF.isZero() && - (UserVFIsLegal || (UserVF.isScalable() && MaxVF.isScalable()))) { - // FIXME: MaxVF is temporarily used inplace of UserVF for illegal + (UserVFIsLegal || (UserVF.isScalable() && MaxUserVF.isScalable()))) { + // FIXME: MaxUserVF is temporarily used inplace of UserVF for illegal // scalable VFs here, this should be reverted to only use legal UserVFs once // the loop below supports scalable VFs. - ElementCount VF = UserVFIsLegal ? UserVF : MaxVF; + ElementCount VF = UserVFIsLegal ? UserVF : MaxUserVF; LLVM_DEBUG(dbgs() << "LV: Using " << (UserVFIsLegal ? "user" : "max") << " VF " << VF << ".\n"); assert(isPowerOf2_32(VF.getKnownMinValue()) && @@ -7791,16 +7815,17 @@ // profitable to scalarize. CM.selectUserVectorizationFactor(VF); CM.collectInLoopReductions(); - buildVPlansWithVPRecipes(VF, VF); + buildVPlansWithVPRecipes({VF}, {VF}); LLVM_DEBUG(printPlans(dbgs())); return {{VF, 0}}; } - assert(!MaxVF.isScalable() && - "Scalable vectors not yet supported beyond this point"); + OptionalVFCandidates MinFactors(ElementCount::getFixed(1), + ElementCount::getScalable(1)); - for (ElementCount VF = ElementCount::getFixed(1); - ElementCount::isKnownLE(VF, MaxVF); VF *= 2) { + SmallVector PowerOf2VFs; + genAllPowerOfTwoFactorsUptoMaxFactors(PowerOf2VFs, MinFactors, MaxFactors); + for (auto &VF : PowerOf2VFs) { // Collect Uniform and Scalar instructions after vectorization with VF. CM.collectUniformsAndScalars(VF); @@ -7812,13 +7837,19 @@ CM.collectInLoopReductions(); - buildVPlansWithVPRecipes(ElementCount::getFixed(1), MaxVF); + buildVPlansWithVPRecipes(MinFactors, MaxFactors); + LLVM_DEBUG(printPlans(dbgs())); - if (MaxVF.isScalar()) - return VectorizationFactor::Disabled(); + if (Optional FixedMaxVF = MaxFactors.getFixedVF()) + if (FixedMaxVF->isScalar()) { + assert((!MaxFactors.hasScalableVF() || + MaxFactors.getScalableVF()->isScalar()) && + "Unexpected max scalable VF"); + return VectorizationFactor::Disabled(); + } // Select the optimal vectorization factor. - return CM.selectVectorizationFactor(MaxVF); + return CM.selectVectorizationFactor(MaxFactors); } void LoopVectorizationPlanner::setBestPlan(ElementCount VF, unsigned UF) { @@ -8712,8 +8743,9 @@ return tryToWiden(Instr, *Plan); } -void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, - ElementCount MaxVF) { +void LoopVectorizationPlanner::buildVPlansWithVPRecipes( + const OptionalVFCandidates &MinVFs, + const OptionalVFCandidates &MaxVFs) { assert(OrigLoop->isInnermost() && "Inner loop expected."); // Collect instructions from the original loop that will become trivially dead @@ -8737,12 +8769,24 @@ for (Instruction *I : DeadInstructions) SinkAfter.erase(I); - auto MaxVFPlusOne = MaxVF.getWithIncrement(1); - for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFPlusOne);) { - VFRange SubRange = {VF, MaxVFPlusOne}; - VPlans.push_back( - buildVPlanWithVPRecipes(SubRange, DeadInstructions, SinkAfter)); - VF = SubRange.End; + auto CollectVPlans = [&](const ElementCount &MinVF, const ElementCount &MaxVF) { + auto MaxVFPlusOne = MaxVF.getWithIncrement(1); + for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFPlusOne);) { + VFRange SubRange = {VF, MaxVFPlusOne}; + VPlans.push_back( + buildVPlanWithVPRecipes(SubRange, DeadInstructions, SinkAfter)); + VF = SubRange.End; + } + }; + + if (MaxVFs.hasFixedVF()) { + assert(MinVFs.hasFixedVF() && "Expected MinVFs.FixedVF to be set"); + CollectVPlans(*MinVFs.getFixedVF(), *MaxVFs.getFixedVF()); + } + + if (MaxVFs.hasScalableVF()) { + assert(MinVFs.hasScalableVF() && "Expected MinVFs.ScalableVF to be set"); + CollectVPlans(*MinVFs.getScalableVF(), *MaxVFs.getScalableVF()); } }