diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -268,12 +268,6 @@ /// A builder used to construct the current plan. VPBuilder Builder; - /// The best number of elements of the vector types used in the - /// transformed loop. BestVF = None means that vectorization is - /// disabled. - Optional BestVF = None; - unsigned BestUF = 0; - public: LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, @@ -295,12 +289,13 @@ /// VF and its cost. VectorizationFactor planInVPlanNativePath(ElementCount UserVF); - /// Finalize the best decision and dispose of all other VPlans. - void setBestPlan(ElementCount VF, unsigned UF); + /// Return the best VPlan for \p VF. + VPlan &getBestPlanFor(ElementCount VF) const; /// Generate the IR code for the body of the vectorized loop according to the - /// best selected VPlan. - void executePlan(InnerLoopVectorizer &LB, DominatorTree *DT); + /// best selected \p VF, \p UF and VPlan \p BestPlan. + void executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, + InnerLoopVectorizer &LB, DominatorTree *DT); #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void printPlans(raw_ostream &O); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8189,28 +8189,30 @@ return SelectedVF; } -void LoopVectorizationPlanner::setBestPlan(ElementCount VF, unsigned UF) { - LLVM_DEBUG(dbgs() << "Setting best plan to VF=" << VF << ", UF=" << UF - << '\n'); - BestVF = VF; - BestUF = UF; +VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const { + assert(count_if(VPlans, + [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) == + 1 && + "Best VF has not a single VPlan."); - erase_if(VPlans, [VF](const VPlanPtr &Plan) { - return !Plan->hasVF(VF); - }); - assert(VPlans.size() == 1 && "Best VF has not a single VPlan."); + for (const VPlanPtr &Plan : VPlans) { + if (Plan->hasVF(VF)) + return *Plan.get(); + } + llvm_unreachable("No plan found!"); } -void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV, +void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF, + VPlan &BestVPlan, + InnerLoopVectorizer &ILV, DominatorTree *DT) { + LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << BestVF << ", UF=" << BestUF + << '\n'); + // Perform the actual loop transformation. // 1. Create a new empty loop. Unlink the old loop and connect the new one. - assert(BestVF.hasValue() && "Vectorization Factor is missing"); - assert(VPlans.size() == 1 && "Not a single VPlan to execute."); - - VPTransformState State{ - *BestVF, BestUF, LI, DT, ILV.Builder, &ILV, VPlans.front().get()}; + VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan}; State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton(); State.TripCount = ILV.getOrCreateTripCount(nullptr); State.CanonicalIV = ILV.Induction; @@ -8226,7 +8228,7 @@ //===------------------------------------------------===// // 2. Copy and widen instructions from the old loop into the new loop. - VPlans.front()->execute(&State); + BestVPlan.execute(&State); // 3. Fix the vectorized code: take care of header phi's, live-outs, // predication, updating analyses. @@ -10064,7 +10066,7 @@ VectorizationFactor::Disabled() == VF) return false; - LVP.setBestPlan(VF.Width, 1); + VPlan &BestPlan = LVP.getBestPlanFor(VF.Width); { GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, @@ -10073,7 +10075,7 @@ &CM, BFI, PSI, Checks); LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \"" << L->getHeader()->getParent()->getName() << "\"\n"); - LVP.executePlan(LB, DT); + LVP.executePlan(VF.Width, 1, BestPlan, LB, DT); } // Mark the loop as already vectorized to avoid vectorizing again. @@ -10395,7 +10397,7 @@ F->getParent()->getDataLayout()); if (!VF.Width.isScalar() || IC > 1) Checks.Create(L, *LVL.getLAI(), PSE.getUnionPredicate()); - LVP.setBestPlan(VF.Width, IC); + VPlan &BestPlan = LVP.getBestPlanFor(VF.Width); using namespace ore; if (!VectorizeLoop) { @@ -10404,7 +10406,7 @@ // interleave it. InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL, &CM, BFI, PSI, Checks); - LVP.executePlan(Unroller, DT); + LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT); ORE->emit([&]() { return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(), @@ -10427,8 +10429,7 @@ EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE, EPI, &LVL, &CM, BFI, PSI, Checks); - LVP.setBestPlan(EPI.MainLoopVF, EPI.MainLoopUF); - LVP.executePlan(MainILV, DT); + LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, BestPlan, MainILV, DT); ++LoopsVectorized; simplifyLoop(L, DT, LI, SE, AC, nullptr, false /* PreserveLCSSA */); @@ -10436,13 +10437,13 @@ // Second pass vectorizes the epilogue and adjusts the control flow // edges from the first pass. - LVP.setBestPlan(EPI.EpilogueVF, EPI.EpilogueUF); EPI.MainLoopVF = EPI.EpilogueVF; EPI.MainLoopUF = EPI.EpilogueUF; EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TLI, TTI, AC, ORE, EPI, &LVL, &CM, BFI, PSI, Checks); - LVP.executePlan(EpilogILV, DT); + LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestPlan, EpilogILV, + DT); ++LoopsEpilogueVectorized; if (!MainILV.areSafetyChecksAdded()) @@ -10450,7 +10451,7 @@ } else { InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC, &LVL, &CM, BFI, PSI, Checks); - LVP.executePlan(LB, DT); + LVP.executePlan(VF.Width, IC, BestPlan, LB, DT); ++LoopsVectorized; // Add metadata to disable runtime unrolling a scalar loop when there diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll --- a/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll @@ -8,9 +8,9 @@ define i32 @foo() { ; CHECK-LABEL: foo -; CHECK-PWR8: Setting best plan to VF=16, UF=4 +; CHECK-PWR8: Executing best plan with VF=16, UF=4 -; CHECK-PWR9: Setting best plan to VF=8, UF=8 +; CHECK-PWR9: Executing best plan with VF=8, UF=8 entry: @@ -46,7 +46,7 @@ ; CHECK-LABEL: goo -; CHECK: Setting best plan to VF=16, UF=4 +; CHECK: Executing best plan with VF=16, UF=4 entry: br label %for.body @@ -79,7 +79,7 @@ define i64 @bar(i64* nocapture %a) { ; CHECK-LABEL: bar -; CHECK: Setting best plan to VF=2, UF=12 +; CHECK: Executing best plan with VF=2, UF=12 entry: br label %for.body @@ -107,7 +107,7 @@ define void @hoo(i32 %n) { ; CHECK-LABEL: hoo -; CHECK: Setting best plan to VF=1, UF=12 +; CHECK: Executing best plan with VF=1, UF=12 entry: br label %for.body