Index: llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -295,12 +295,13 @@ /// VF and its cost. VectorizationFactor planInVPlanNativePath(ElementCount UserVF); - /// Finalize the best decision and dispose of all other VPlans. - void setBestPlan(ElementCount VF, unsigned UF); + /// Return the best VPlan for the pair. + const VPlanPtr &getBestPlanFor(ElementCount VF, unsigned UF); /// Generate the IR code for the body of the vectorized loop according to the - /// best selected VPlan. - void executePlan(InnerLoopVectorizer &LB, DominatorTree *DT); + /// best selected VPlan \p BestPlan. + void executePlan(const VPlanPtr &BestPlan, InnerLoopVectorizer &LB, + DominatorTree *DT); #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void printPlans(raw_ostream &O); Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8182,28 +8182,34 @@ return SelectedVF; } -void LoopVectorizationPlanner::setBestPlan(ElementCount VF, unsigned UF) { +const VPlanPtr &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF, + unsigned UF) { LLVM_DEBUG(dbgs() << "Setting best plan to VF=" << VF << ", UF=" << UF << '\n'); BestVF = VF; BestUF = UF; - erase_if(VPlans, [VF](const VPlanPtr &Plan) { - return !Plan->hasVF(VF); - }); - assert(VPlans.size() == 1 && "Best VF has not a single VPlan."); + unsigned NumPlans = + count_if(VPlans, [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }); + assert(NumPlans == 1 && "Best VF has not a single VPlan."); + + for (VPlanPtr &Plan : VPlans) { + if (Plan->hasVF(VF)) + return Plan; + } + llvm_unreachable("No plan found!"); } -void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV, +void LoopVectorizationPlanner::executePlan(const VPlanPtr &BestVPlan, + InnerLoopVectorizer &ILV, DominatorTree *DT) { // Perform the actual loop transformation. // 1. Create a new empty loop. Unlink the old loop and connect the new one. assert(BestVF.hasValue() && "Vectorization Factor is missing"); - assert(VPlans.size() == 1 && "Not a single VPlan to execute."); - VPTransformState State{ - *BestVF, BestUF, LI, DT, ILV.Builder, &ILV, VPlans.front().get()}; + VPTransformState State{*BestVF, BestUF, LI, DT, ILV.Builder, + &ILV, BestVPlan.get()}; State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton(); State.TripCount = ILV.getOrCreateTripCount(nullptr); State.CanonicalIV = ILV.Induction; @@ -8219,7 +8225,7 @@ //===------------------------------------------------===// // 2. Copy and widen instructions from the old loop into the new loop. - VPlans.front()->execute(&State); + BestVPlan->execute(&State); // 3. Fix the vectorized code: take care of header phi's, live-outs, // predication, updating analyses. @@ -10049,7 +10055,7 @@ VectorizationFactor::Disabled() == VF) return false; - LVP.setBestPlan(VF.Width, 1); + const VPlanPtr &BestPlan = LVP.getBestPlanFor(VF.Width, 1); { GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, @@ -10058,7 +10064,7 @@ &CM, BFI, PSI, Checks); LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \"" << L->getHeader()->getParent()->getName() << "\"\n"); - LVP.executePlan(LB, DT); + LVP.executePlan(BestPlan, LB, DT); } // Mark the loop as already vectorized to avoid vectorizing again. @@ -10380,7 +10386,7 @@ F->getParent()->getDataLayout()); if (!VF.Width.isScalar() || IC > 1) Checks.Create(L, *LVL.getLAI(), PSE.getUnionPredicate()); - LVP.setBestPlan(VF.Width, IC); + const VPlanPtr &BestPlan = LVP.getBestPlanFor(VF.Width, IC); using namespace ore; if (!VectorizeLoop) { @@ -10389,7 +10395,7 @@ // interleave it. InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL, &CM, BFI, PSI, Checks); - LVP.executePlan(Unroller, DT); + LVP.executePlan(BestPlan, Unroller, DT); ORE->emit([&]() { return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(), @@ -10412,8 +10418,9 @@ EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE, EPI, &LVL, &CM, BFI, PSI, Checks); - LVP.setBestPlan(EPI.MainLoopVF, EPI.MainLoopUF); - LVP.executePlan(MainILV, DT); + const VPlanPtr &BestPlan = + LVP.getBestPlanFor(EPI.MainLoopVF, EPI.MainLoopUF); + LVP.executePlan(BestPlan, MainILV, DT); ++LoopsVectorized; simplifyLoop(L, DT, LI, SE, AC, nullptr, false /* PreserveLCSSA */); @@ -10421,13 +10428,14 @@ // Second pass vectorizes the epilogue and adjusts the control flow // edges from the first pass. - LVP.setBestPlan(EPI.EpilogueVF, EPI.EpilogueUF); + const VPlanPtr &BestPlan2 = + LVP.getBestPlanFor(EPI.EpilogueVF, EPI.EpilogueUF); EPI.MainLoopVF = EPI.EpilogueVF; EPI.MainLoopUF = EPI.EpilogueUF; EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TLI, TTI, AC, ORE, EPI, &LVL, &CM, BFI, PSI, Checks); - LVP.executePlan(EpilogILV, DT); + LVP.executePlan(BestPlan2, EpilogILV, DT); ++LoopsEpilogueVectorized; if (!MainILV.areSafetyChecksAdded()) @@ -10435,7 +10443,7 @@ } else { InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC, &LVL, &CM, BFI, PSI, Checks); - LVP.executePlan(LB, DT); + LVP.executePlan(BestPlan, LB, DT); ++LoopsVectorized; // Add metadata to disable runtime unrolling a scalar loop when there