diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -267,7 +267,7 @@ LoopVectorizationLegality *Legal; /// The profitability analysis. - LoopVectorizationCostModel &CM; + SmallVector CMs; /// The interleaved access analysis. InterleavedAccessInfo &IAI; @@ -287,16 +287,18 @@ LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, LoopVectorizationLegality *Legal, - LoopVectorizationCostModel &CM, + ArrayRef CMs, InterleavedAccessInfo &IAI, PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints, OptimizationRemarkEmitter *ORE) - : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM), IAI(IAI), - PSE(PSE), Hints(Hints), ORE(ORE) {} + : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CMs(CMs), + IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {} /// Plan how to best vectorize, return the best VF and its cost, or /// std::nullopt if vectorization and interleaving should be avoided up front. + std::optional plan(ElementCount UserVF, unsigned UserIC, + LoopVectorizationCostModel &CM); std::optional plan(ElementCount UserVF, unsigned UserIC); /// Use the VPlan-native path to plan how to best vectorize, return the best @@ -340,24 +342,27 @@ /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, /// according to the information gathered by Legal when it checked if it is /// legal to vectorize the loop. - void buildVPlans(ElementCount MinVF, ElementCount MaxVF); + void buildVPlans(ElementCount MinVF, ElementCount MaxVF, + LoopVectorizationCostModel &CM); private: /// Build a VPlan according to the information gathered by Legal. \return a /// VPlan for vectorization factors \p Range.Start and up to \p Range.End /// exclusive, possibly decreasing \p Range.End. - VPlanPtr buildVPlan(VFRange &Range); + VPlanPtr buildVPlan(VFRange &Range, LoopVectorizationCostModel &CM); /// Build a VPlan using VPRecipes according to the information gather by /// Legal. This method is only used for the legacy inner loop vectorizer. VPlanPtr buildVPlanWithVPRecipes( VFRange &Range, SmallPtrSetImpl &DeadInstructions, - const MapVector &SinkAfter); + const MapVector &SinkAfter, + LoopVectorizationCostModel &CM); /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, /// according to the information gathered by Legal when it checked if it is /// legal to vectorize the loop. This method creates VPlans using VPRecipes. - void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF); + void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF, + LoopVectorizationCostModel &CM); // Adjust the recipes for reductions. For in-loop reductions the chain of // instructions leading from the loop exit instr to the phi need to be @@ -366,7 +371,8 @@ // between the phi and live-out recipes when folding the tail. void adjustRecipesForReductions(VPBasicBlock *LatchVPBB, VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder, - ElementCount MinVF); + ElementCount MinVF, + LoopVectorizationCostModel &CM); }; } // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7464,7 +7464,7 @@ VF = ElementCount::getFixed(determineVPlanVF( TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) .getFixedValue(), - CM)); + *CMs[0])); LLVM_DEBUG(dbgs() << "LV: VPlan computed VF " << VF << ".\n"); // Make sure we have a VF > 1 for stress testing. @@ -7479,7 +7479,7 @@ "VF needs to be a power of two"); LLVM_DEBUG(dbgs() << "LV: Using " << (!UserVF.isZero() ? "user " : "") << "VF " << VF << " to build VPlans.\n"); - buildVPlans(VF, VF); + buildVPlans(VF, VF, *CMs[0]); // For VPlan build stress testing, we bail out after VPlan construction. if (VPlanBuildStressTest) @@ -7496,6 +7496,29 @@ std::optional LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { + std::optional Best; + SmallVector BestPlans; + + for (LoopVectorizationCostModel *CM : CMs) { + auto Current = plan(UserVF, UserIC, *CM); + if (!Current) { + VPlans.clear(); + continue; + } + if (!Best || CM->isMoreProfitable(*Current, *Best)) { + Best = Current; + BestPlans = std::move(VPlans); + VPlans.clear(); + } + } + + VPlans = std::move(BestPlans); + return Best; +} + +std::optional +LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC, + LoopVectorizationCostModel &CM) { assert(OrigLoop->isInnermost() && "Inner loop expected."); CM.collectValuesToIgnore(); CM.collectElementTypesForWidening(); @@ -7529,7 +7552,7 @@ if (CM.selectUserVectorizationFactor(UserVF)) { LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n"); CM.collectInLoopReductions(); - buildVPlansWithVPRecipes(UserVF, UserVF); + buildVPlansWithVPRecipes(UserVF, UserVF, CM); LLVM_DEBUG(printPlans(dbgs())); return {{UserVF, 0, 0}}; } else @@ -7552,13 +7575,15 @@ // Collect the instructions (and their associated costs) that will be more // profitable to scalarize. - if (VF.isVector()) + if (VF.isVector()) { CM.collectInstsToScalarize(VF); + } } CM.collectInLoopReductions(); - buildVPlansWithVPRecipes(ElementCount::getFixed(1), MaxFactors.FixedVF); - buildVPlansWithVPRecipes(ElementCount::getScalable(1), MaxFactors.ScalableVF); + buildVPlansWithVPRecipes(ElementCount::getFixed(1), MaxFactors.FixedVF, CM); + buildVPlansWithVPRecipes(ElementCount::getScalable(1), MaxFactors.ScalableVF, + CM); LLVM_DEBUG(printPlans(dbgs())); if (!MaxFactors.hasVector()) @@ -8027,11 +8052,12 @@ /// vectorization decision can potentially shorten this sub-range during /// buildVPlan(). void LoopVectorizationPlanner::buildVPlans(ElementCount MinVF, - ElementCount MaxVF) { + ElementCount MaxVF, + LoopVectorizationCostModel &CM) { auto MaxVFPlusOne = MaxVF.getWithIncrement(1); for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFPlusOne);) { VFRange SubRange = {VF, MaxVFPlusOne}; - VPlans.push_back(buildVPlan(SubRange)); + VPlans.push_back(buildVPlan(SubRange, CM)); VF = SubRange.End; } } @@ -8668,8 +8694,8 @@ return toVPRecipeResult(tryToWiden(Instr, Operands, VPBB, Plan)); } -void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, - ElementCount MaxVF) { +void LoopVectorizationPlanner::buildVPlansWithVPRecipes( + ElementCount MinVF, ElementCount MaxVF, LoopVectorizationCostModel &CM) { assert(OrigLoop->isInnermost() && "Inner loop expected."); // Add assume instructions we need to drop to DeadInstructions, to prevent @@ -8707,7 +8733,7 @@ for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFPlusOne);) { VFRange SubRange = {VF, MaxVFPlusOne}; VPlans.push_back( - buildVPlanWithVPRecipes(SubRange, DeadInstructions, SinkAfter)); + buildVPlanWithVPRecipes(SubRange, DeadInstructions, SinkAfter, CM)); VF = SubRange.End; } } @@ -8814,7 +8840,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( VFRange &Range, SmallPtrSetImpl &DeadInstructions, - const MapVector &SinkAfter) { + const MapVector &SinkAfter, + LoopVectorizationCostModel &CM) { SmallPtrSet *, 1> InterleaveGroups; @@ -8854,7 +8881,7 @@ // placeholders for its members' Recipes which we'll be replacing with a // single VPInterleaveRecipe. for (InterleaveGroup *IG : IAI.getInterleaveGroups()) { - auto applyIG = [IG, this](ElementCount VF) -> bool { + auto applyIG = [IG, &CM](ElementCount VF) -> bool { return (VF.isVector() && // Query is illegal for VF == 1 CM.getWideningDecision(IG->getInsertPos(), VF) == LoopVectorizationCostModel::CM_Interleave); @@ -9072,7 +9099,7 @@ // Adjust the recipes for any inloop reductions. adjustRecipesForReductions(cast(TopRegion->getExiting()), Plan, - RecipeBuilder, Range.Start); + RecipeBuilder, Range.Start, CM); // Introduce a recipe to combine the incoming and previous values of a // fixed-order recurrence. @@ -9169,7 +9196,8 @@ return Plan; } -VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { +VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range, + LoopVectorizationCostModel &CM) { // Outer loop handling: They may require CFG and instruction level // transformations before even evaluating whether vectorization is profitable. // Since we cannot modify the incoming IR, we need to build VPlan upfront in @@ -9212,7 +9240,7 @@ // and live-out recipes when folding the tail. void LoopVectorizationPlanner::adjustRecipesForReductions( VPBasicBlock *LatchVPBB, VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder, - ElementCount MinVF) { + ElementCount MinVF, LoopVectorizationCostModel &CM) { for (const auto &Reduction : CM.getInLoopReductionChains()) { PHINode *Phi = Reduction.first; const RecurrenceDescriptor &RdxDesc = @@ -9973,7 +10001,8 @@ // Use the planner for outer loop vectorization. // TODO: CM is not used at this point inside the planner. Turn CM into an // optional argument if we don't need it in the future. - LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI, PSE, Hints, ORE); + LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, ArrayRef(&CM), IAI, PSE, + Hints, ORE); // Get user vectorization factor. ElementCount UserVF = Hints.getWidth(); @@ -10313,7 +10342,8 @@ LoopVectorizationCostModel CM(SEL, L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE, F, &Hints, IAI); // Use the planner for vectorization. - LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI, PSE, Hints, ORE); + LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, {&CM}, IAI, PSE, Hints, + ORE); // Get user vectorization factor and interleave count. ElementCount UserVF = Hints.getWidth();