Index: llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -268,12 +268,6 @@ /// A builder used to construct the current plan. VPBuilder Builder; - /// The best number of elements of the vector types used in the - /// transformed loop. BestVF = None means that vectorization is - /// disabled. - Optional BestVF = None; - unsigned BestUF = 0; - public: LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, @@ -295,12 +289,13 @@ /// VF and its cost. VectorizationFactor planInVPlanNativePath(ElementCount UserVF); - /// Finalize the best decision and dispose of all other VPlans. - void setBestPlan(ElementCount VF, unsigned UF); + /// Return the best VPlan for the pair. + const VPlanPtr &getBestPlanFor(ElementCount VF, unsigned UF) const; /// Generate the IR code for the body of the vectorized loop according to the - /// best selected VPlan. - void executePlan(InnerLoopVectorizer &LB, DominatorTree *DT); + /// best selected VPlan \p BestPlan. + void executePlan(ElementCount VF, unsigned UF, const VPlanPtr &BestPlan, + InnerLoopVectorizer &LB, DominatorTree *DT); #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void printPlans(raw_ostream &O); Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8182,28 +8182,32 @@ return SelectedVF; } -void LoopVectorizationPlanner::setBestPlan(ElementCount VF, unsigned UF) { +const VPlanPtr &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF, + unsigned UF) const { LLVM_DEBUG(dbgs() << "Setting best plan to VF=" << VF << ", UF=" << UF << '\n'); - BestVF = VF; - BestUF = UF; - erase_if(VPlans, [VF](const VPlanPtr &Plan) { - return !Plan->hasVF(VF); - }); - assert(VPlans.size() == 1 && "Best VF has not a single VPlan."); + assert(count_if(VPlans, + [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) == + 1 && + "Best VF has not a single VPlan."); + + for (const VPlanPtr &Plan : VPlans) { + if (Plan->hasVF(VF)) + return Plan; + } + llvm_unreachable("No plan found!"); } -void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV, +void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF, + const VPlanPtr &BestVPlan, + InnerLoopVectorizer &ILV, DominatorTree *DT) { // Perform the actual loop transformation. // 1. Create a new empty loop. Unlink the old loop and connect the new one. - assert(BestVF.hasValue() && "Vectorization Factor is missing"); - assert(VPlans.size() == 1 && "Not a single VPlan to execute."); - - VPTransformState State{ - *BestVF, BestUF, LI, DT, ILV.Builder, &ILV, VPlans.front().get()}; + VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, + &ILV, BestVPlan.get()}; State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton(); State.TripCount = ILV.getOrCreateTripCount(nullptr); State.CanonicalIV = ILV.Induction; @@ -8219,7 +8223,7 @@ //===------------------------------------------------===// // 2. Copy and widen instructions from the old loop into the new loop. - VPlans.front()->execute(&State); + BestVPlan->execute(&State); // 3. Fix the vectorized code: take care of header phi's, live-outs, // predication, updating analyses. @@ -10049,7 +10053,7 @@ VectorizationFactor::Disabled() == VF) return false; - LVP.setBestPlan(VF.Width, 1); + const VPlanPtr &BestPlan = LVP.getBestPlanFor(VF.Width, 1); { GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, @@ -10058,7 +10062,7 @@ &CM, BFI, PSI, Checks); LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \"" << L->getHeader()->getParent()->getName() << "\"\n"); - LVP.executePlan(LB, DT); + LVP.executePlan(VF.Width, 1, BestPlan, LB, DT); } // Mark the loop as already vectorized to avoid vectorizing again. @@ -10380,7 +10384,7 @@ F->getParent()->getDataLayout()); if (!VF.Width.isScalar() || IC > 1) Checks.Create(L, *LVL.getLAI(), PSE.getUnionPredicate()); - LVP.setBestPlan(VF.Width, IC); + const VPlanPtr &BestPlan = LVP.getBestPlanFor(VF.Width, IC); using namespace ore; if (!VectorizeLoop) { @@ -10389,7 +10393,7 @@ // interleave it. InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL, &CM, BFI, PSI, Checks); - LVP.executePlan(Unroller, DT); + LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT); ORE->emit([&]() { return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(), @@ -10412,8 +10416,9 @@ EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE, EPI, &LVL, &CM, BFI, PSI, Checks); - LVP.setBestPlan(EPI.MainLoopVF, EPI.MainLoopUF); - LVP.executePlan(MainILV, DT); + const VPlanPtr &BestPlan = + LVP.getBestPlanFor(EPI.MainLoopVF, EPI.MainLoopUF); + LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, BestPlan, MainILV, DT); ++LoopsVectorized; simplifyLoop(L, DT, LI, SE, AC, nullptr, false /* PreserveLCSSA */); @@ -10421,13 +10426,15 @@ // Second pass vectorizes the epilogue and adjusts the control flow // edges from the first pass. - LVP.setBestPlan(EPI.EpilogueVF, EPI.EpilogueUF); + const VPlanPtr &BestPlan2 = + LVP.getBestPlanFor(EPI.EpilogueVF, EPI.EpilogueUF); EPI.MainLoopVF = EPI.EpilogueVF; EPI.MainLoopUF = EPI.EpilogueUF; EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TLI, TTI, AC, ORE, EPI, &LVL, &CM, BFI, PSI, Checks); - LVP.executePlan(EpilogILV, DT); + LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestPlan2, EpilogILV, + DT); ++LoopsEpilogueVectorized; if (!MainILV.areSafetyChecksAdded()) @@ -10435,7 +10442,7 @@ } else { InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC, &LVL, &CM, BFI, PSI, Checks); - LVP.executePlan(LB, DT); + LVP.executePlan(VF.Width, IC, BestPlan, LB, DT); ++LoopsVectorized; // Add metadata to disable runtime unrolling a scalar loop when there