Index: llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -268,12 +268,6 @@ /// A builder used to construct the current plan. VPBuilder Builder; - /// The best number of elements of the vector types used in the - /// transformed loop. BestVF = None means that vectorization is - /// disabled. - Optional BestVF = None; - unsigned BestUF = 0; - public: LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, @@ -295,12 +289,13 @@ /// VF and its cost. VectorizationFactor planInVPlanNativePath(ElementCount UserVF); - /// Finalize the best decision and dispose of all other VPlans. - void setBestPlan(ElementCount VF, unsigned UF); + /// Return the best VPlan for \p VF. + const VPlanPtr &getBestPlanFor(ElementCount VF) const; /// Generate the IR code for the body of the vectorized loop according to the - /// best selected VPlan. - void executePlan(InnerLoopVectorizer &LB, DominatorTree *DT); + /// best selected \p VF, \p UF and VPlan \p BestPlan. + void executePlan(ElementCount VF, unsigned UF, const VPlanPtr &BestPlan, + InnerLoopVectorizer &LB, DominatorTree *DT); #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void printPlans(raw_ostream &O); @@ -316,6 +311,14 @@ }); } + /// Remove all VPlans except those containing one of the \p VFs. + void removePlansExcept(const ArrayRef VFs) { + erase_if(VPlans, [VFs](const VPlanPtr &Plan) { + return all_of( + VFs, [&Plan](const ElementCount &VF) { return !Plan->hasVF(VF); }); + }); + } + /// Test a \p Predicate on a \p Range of VF's. Return the value of applying /// \p Predicate on Range.Start, possibly decreasing Range.End such that the /// returned value holds for the entire \p Range. Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8182,28 +8182,31 @@ return SelectedVF; } -void LoopVectorizationPlanner::setBestPlan(ElementCount VF, unsigned UF) { - LLVM_DEBUG(dbgs() << "Setting best plan to VF=" << VF << ", UF=" << UF - << '\n'); - BestVF = VF; - BestUF = UF; - - erase_if(VPlans, [VF](const VPlanPtr &Plan) { - return !Plan->hasVF(VF); - }); - assert(VPlans.size() == 1 && "Best VF has not a single VPlan."); +const VPlanPtr & +LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const { + LLVM_DEBUG(dbgs() << "Getting best plan for VF=" << VF << '\n'); + + assert(count_if(VPlans, + [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) == + 1 && + "Best VF has not a single VPlan."); + + for (const VPlanPtr &Plan : VPlans) { + if (Plan->hasVF(VF)) + return Plan; + } + llvm_unreachable("No plan found!"); } -void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV, +void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF, + const VPlanPtr &BestVPlan, + InnerLoopVectorizer &ILV, DominatorTree *DT) { // Perform the actual loop transformation. // 1. Create a new empty loop. Unlink the old loop and connect the new one. - assert(BestVF.hasValue() && "Vectorization Factor is missing"); - assert(VPlans.size() == 1 && "Not a single VPlan to execute."); - - VPTransformState State{ - *BestVF, BestUF, LI, DT, ILV.Builder, &ILV, VPlans.front().get()}; + VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, + &ILV, BestVPlan.get()}; State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton(); State.TripCount = ILV.getOrCreateTripCount(nullptr); State.CanonicalIV = ILV.Induction; @@ -8219,7 +8222,7 @@ //===------------------------------------------------===// // 2. Copy and widen instructions from the old loop into the new loop. - VPlans.front()->execute(&State); + BestVPlan->execute(&State); // 3. Fix the vectorized code: take care of header phi's, live-outs, // predication, updating analyses. @@ -10049,7 +10052,8 @@ VectorizationFactor::Disabled() == VF) return false; - LVP.setBestPlan(VF.Width, 1); + LVP.removePlansExcept({VF.Width}); + const VPlanPtr &BestPlan = LVP.getBestPlanFor(VF.Width); { GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, @@ -10058,7 +10062,7 @@ &CM, BFI, PSI, Checks); LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \"" << L->getHeader()->getParent()->getName() << "\"\n"); - LVP.executePlan(LB, DT); + LVP.executePlan(VF.Width, 1, BestPlan, LB, DT); } // Mark the loop as already vectorized to avoid vectorizing again. @@ -10380,7 +10384,8 @@ F->getParent()->getDataLayout()); if (!VF.Width.isScalar() || IC > 1) Checks.Create(L, *LVL.getLAI(), PSE.getUnionPredicate()); - LVP.setBestPlan(VF.Width, IC); + LVP.removePlansExcept({VF.Width}); + const VPlanPtr &BestPlan = LVP.getBestPlanFor(VF.Width); using namespace ore; if (!VectorizeLoop) { @@ -10389,7 +10394,7 @@ // interleave it. InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL, &CM, BFI, PSI, Checks); - LVP.executePlan(Unroller, DT); + LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT); ORE->emit([&]() { return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(), @@ -10412,8 +10417,8 @@ EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE, EPI, &LVL, &CM, BFI, PSI, Checks); - LVP.setBestPlan(EPI.MainLoopVF, EPI.MainLoopUF); - LVP.executePlan(MainILV, DT); + LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, BestPlan, MainILV, + DT); ++LoopsVectorized; simplifyLoop(L, DT, LI, SE, AC, nullptr, false /* PreserveLCSSA */); @@ -10421,13 +10426,13 @@ // Second pass vectorizes the epilogue and adjusts the control flow // edges from the first pass. - LVP.setBestPlan(EPI.EpilogueVF, EPI.EpilogueUF); EPI.MainLoopVF = EPI.EpilogueVF; EPI.MainLoopUF = EPI.EpilogueUF; EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TLI, TTI, AC, ORE, EPI, &LVL, &CM, BFI, PSI, Checks); - LVP.executePlan(EpilogILV, DT); + LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestPlan, EpilogILV, + DT); ++LoopsEpilogueVectorized; if (!MainILV.areSafetyChecksAdded()) @@ -10435,7 +10440,7 @@ } else { InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC, &LVL, &CM, BFI, PSI, Checks); - LVP.executePlan(LB, DT); + LVP.executePlan(VF.Width, IC, BestPlan, LB, DT); ++LoopsVectorized; // Add metadata to disable runtime unrolling a scalar loop when there Index: llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll +++ llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll @@ -8,9 +8,9 @@ define i32 @foo() { ; CHECK-LABEL: foo -; CHECK-PWR8: Setting best plan to VF=16, UF=4 +; CHECK-PWR8: Getting best plan for VF=16 -; CHECK-PWR9: Setting best plan to VF=8, UF=8 +; CHECK-PWR9: Getting best plan for VF=8 entry: @@ -46,7 +46,7 @@ ; CHECK-LABEL: goo -; CHECK: Setting best plan to VF=16, UF=4 +; CHECK: Getting best plan for VF=16 entry: br label %for.body @@ -79,7 +79,7 @@ define i64 @bar(i64* nocapture %a) { ; CHECK-LABEL: bar -; CHECK: Setting best plan to VF=2, UF=12 +; CHECK: Getting best plan for VF=2 entry: br label %for.body @@ -107,7 +107,7 @@ define void @hoo(i32 %n) { ; CHECK-LABEL: hoo -; CHECK: Setting best plan to VF=1, UF=12 +; CHECK: Getting best plan for VF=1 entry: br label %for.body