diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -278,7 +278,12 @@ OptimizationRemarkEmitter *ORE; - SmallVector VPlans; + SmallVector VPlans; + + /// Candidate pairs of VectorizationFactors and a bool indicating whether any + /// vector instructions will be generated for a VPlan. + DenseMap>> + CandidateVFs; /// A builder used to construct the current plan. VPBuilder Builder; @@ -336,6 +341,13 @@ /// Check if the number of runtime checks exceeds the threshold. bool requiresTooManyRuntimeChecks() const; + /// \return The most profitable vectorization factor and the cost of that VF. + /// This method checks every VF in every plan in VPlans. + VectorizationFactor selectVectorizationFactor(); + + VectorizationFactor + selectEpilogueVectorizationFactor(const ElementCount MaxVF); + protected: /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, /// according to the information gathered by Legal when it checked if it is @@ -370,6 +382,29 @@ void adjustRecipesForReductions(VPBasicBlock *LatchVPBB, VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder, ElementCount MinVF); + + std::optional getVScaleForTuning() const; + + /// Returns true when Factor A is more profitable than Factor B. + bool isMoreProfitable(const VectorizationFactor &A, + const VectorizationFactor &B) const; + + /// Determines if we have the infrastructure to vectorize loop \p L and its + /// epilogue, assuming the main loop is vectorized by \p VF. + bool isCandidateForEpilogueVectorization(const Loop &L, + const ElementCount VF) const; + + /// Returns true if epilogue vectorization is considered profitable, and + /// false otherwise. + /// \p VF is the vectorization factor chosen for the original loop. + bool isEpilogueVectorizationProfitable(const ElementCount VF) const; + + ArrayRef> + getCandidateVFsFor(VPlan &Plan) const { + auto I = CandidateVFs.find(&Plan); + assert(I != CandidateVFs.end()); + return I->second; + } }; } // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1161,6 +1161,8 @@ using InstructionVFPair = std::pair; +using VectorizationCostTy = std::pair; + /// LoopVectorizationCostModel - estimates the expected speedups due to /// vectorization. /// In many cases vectorization is not profitable. This can happen because of @@ -1192,17 +1194,6 @@ /// otherwise. bool runtimeChecksRequired(); - /// \return The most profitable vectorization factor and the cost of that VF. - /// This method checks every VF in \p CandidateVFs. If UserVF is not ZERO - /// then this vectorization factor will be selected if vectorization is - /// possible. - VectorizationFactor - selectVectorizationFactor(const ElementCountSet &CandidateVFs); - - VectorizationFactor - selectEpilogueVectorizationFactor(const ElementCount MaxVF, - const LoopVectorizationPlanner &LVP); - /// Setup cost-based decisions for user vectorization factor. /// \return true if the UserVF is a feasible VF to be chosen. bool selectUserVectorizationFactor(ElementCount UserVF) { @@ -1637,6 +1628,18 @@ /// returned by the corresponding TLI method. std::optional getVScaleForTuning() const; + /// Returns the expected execution cost. The unit of the cost does + /// not matter because we use the 'cost' units to compare different + /// vector widths. The cost that is returned is *not* normalized by + /// the factor width. If \p Invalid is not nullptr, this function + /// will add a pair(Instruction*, ElementCount) to \p Invalid for + /// each instruction that has an Invalid cost for the given VF. + VectorizationCostTy + expectedCost(ElementCount VF, + SmallVectorImpl *Invalid = nullptr); + + bool hasPredStores() const { return NumPredStores > 0; } + private: unsigned NumPredStores = 0; @@ -1667,17 +1670,6 @@ /// operate on vector values after type legalization in the backend. If this /// latter value is false, then all operations will be scalarized (i.e. no /// vectorization has actually taken place). - using VectorizationCostTy = std::pair; - - /// Returns the expected execution cost. The unit of the cost does - /// not matter because we use the 'cost' units to compare different - /// vector widths. The cost that is returned is *not* normalized by - /// the factor width. If \p Invalid is not nullptr, this function - /// will add a pair(Instruction*, ElementCount) to \p Invalid for - /// each instruction that has an Invalid cost for the given VF. - VectorizationCostTy - expectedCost(ElementCount VF, - SmallVectorImpl *Invalid = nullptr); /// Returns the execution time cost of an instruction for a given vector /// width. Vector width of one means scalar. @@ -1841,16 +1833,6 @@ Ops, [this, VF](Value *V) { return this->needsExtract(V, VF); })); } - /// Determines if we have the infrastructure to vectorize loop \p L and its - /// epilogue, assuming the main loop is vectorized by \p VF. - bool isCandidateForEpilogueVectorization(const Loop &L, - const ElementCount VF) const; - - /// Returns true if epilogue vectorization is considered profitable, and - /// false otherwise. - /// \p VF is the vectorization factor chosen for the original loop. - bool isEpilogueVectorizationProfitable(const ElementCount VF) const; - public: /// The loop that we evaluate. Loop *TheLoop; @@ -5359,57 +5341,6 @@ return TTI.getVScaleForTuning(); } -bool LoopVectorizationCostModel::isMoreProfitable( - const VectorizationFactor &A, const VectorizationFactor &B) const { - InstructionCost CostA = A.Cost; - InstructionCost CostB = B.Cost; - - unsigned MaxTripCount = PSE.getSE()->getSmallConstantMaxTripCount(TheLoop); - - if (!A.Width.isScalable() && !B.Width.isScalable() && MaxTripCount) { - // If the trip count is a known (possibly small) constant, the trip count - // will be rounded up to an integer number of iterations under - // FoldTailByMasking. The total cost in that case will be - // VecCost*ceil(TripCount/VF). When not folding the tail, the total - // cost will be VecCost*floor(TC/VF) + ScalarCost*(TC%VF). There will be - // some extra overheads, but for the purpose of comparing the costs of - // different VFs we can use this to compare the total loop-body cost - // expected after vectorization. - auto GetCostForTC = [MaxTripCount, this](unsigned VF, - InstructionCost VectorCost, - InstructionCost ScalarCost) { - return foldTailByMasking() ? VectorCost * divideCeil(MaxTripCount, VF) - : VectorCost * (MaxTripCount / VF) + - ScalarCost * (MaxTripCount % VF); - }; - auto RTCostA = GetCostForTC(A.Width.getFixedValue(), CostA, A.ScalarCost); - auto RTCostB = GetCostForTC(B.Width.getFixedValue(), CostB, B.ScalarCost); - - return RTCostA < RTCostB; - } - - // Improve estimate for the vector width if it is scalable. - unsigned EstimatedWidthA = A.Width.getKnownMinValue(); - unsigned EstimatedWidthB = B.Width.getKnownMinValue(); - if (std::optional VScale = getVScaleForTuning()) { - if (A.Width.isScalable()) - EstimatedWidthA *= *VScale; - if (B.Width.isScalable()) - EstimatedWidthB *= *VScale; - } - - // Assume vscale may be larger than 1 (or the value being tuned for), - // so that scalable vectorization is slightly favorable over fixed-width - // vectorization. - if (A.Width.isScalable() && !B.Width.isScalable()) - return (CostA * B.Width.getFixedValue()) <= (CostB * EstimatedWidthA); - - // To avoid the need for FP division: - // (CostA / A.Width) < (CostB / B.Width) - // <=> (CostA * B.Width) < (CostB * A.Width) - return (CostA * EstimatedWidthB) < (CostB * EstimatedWidthA); -} - static void emitInvalidCostRemarks(SmallVector InvalidCosts, OptimizationRemarkEmitter *ORE, Loop *TheLoop) { @@ -5474,73 +5405,105 @@ } while (!Tail.empty()); } -VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor( - const ElementCountSet &VFCandidates) { - InstructionCost ExpectedCost = expectedCost(ElementCount::getFixed(1)).first; - LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ExpectedCost << ".\n"); - assert(ExpectedCost.isValid() && "Unexpected invalid cost for scalar loop"); - assert(VFCandidates.count(ElementCount::getFixed(1)) && - "Expected Scalar VF to be a candidate"); +bool LoopVectorizationPlanner::isMoreProfitable( + const VectorizationFactor &A, const VectorizationFactor &B) const { + InstructionCost CostA = A.Cost; + InstructionCost CostB = B.Cost; + + unsigned MaxTripCount = PSE.getSE()->getSmallConstantMaxTripCount(OrigLoop); - const VectorizationFactor ScalarCost(ElementCount::getFixed(1), ExpectedCost, + if (!A.Width.isScalable() && !B.Width.isScalable() && MaxTripCount) { + // If the trip count is a known (possibly small) constant, the trip count + // will be rounded up to an integer number of iterations under + // FoldTailByMasking. The total cost in that case will be + // VecCost*ceil(TripCount/VF). When not folding the tail, the total + // cost will be VecCost*floor(TC/VF) + ScalarCost*(TC%VF). There will be + // some extra overheads, but for the purpose of comparing the costs of + // different VFs we can use this to compare the total loop-body cost + // expected after vectorization. + auto GetCostForTC = [MaxTripCount, this](unsigned VF, + InstructionCost VectorCost, + InstructionCost ScalarCost) { + return CM.foldTailByMasking() ? VectorCost * divideCeil(MaxTripCount, VF) + : VectorCost * (MaxTripCount / VF) + + ScalarCost * (MaxTripCount % VF); + }; + auto RTCostA = GetCostForTC(A.Width.getFixedValue(), CostA, A.ScalarCost); + auto RTCostB = GetCostForTC(B.Width.getFixedValue(), CostB, B.ScalarCost); + + return RTCostA < RTCostB; + } + + // Improve estimate for the vector width if it is scalable. + unsigned EstimatedWidthA = A.Width.getKnownMinValue(); + unsigned EstimatedWidthB = B.Width.getKnownMinValue(); + if (std::optional VScale = getVScaleForTuning()) { + if (A.Width.isScalable()) + EstimatedWidthA *= *VScale; + if (B.Width.isScalable()) + EstimatedWidthB *= *VScale; + } + + // Assume vscale may be larger than 1 (or the value being tuned for), + // so that scalable vectorization is slightly favorable over fixed-width + // vectorization. + if (A.Width.isScalable() && !B.Width.isScalable()) + return (CostA * B.Width.getFixedValue()) <= (CostB * EstimatedWidthA); + + // To avoid the need for FP division: + // (CostA / A.Width) < (CostB / B.Width) + // <=> (CostA * B.Width) < (CostB * A.Width) + return (CostA * EstimatedWidthB) < (CostB * EstimatedWidthA); +} + +std::optional LoopVectorizationPlanner::getVScaleForTuning() const { + Function *TheFunction = OrigLoop->getHeader()->getParent(); + if (TheFunction->hasFnAttribute(Attribute::VScaleRange)) { + auto Attr = TheFunction->getFnAttribute(Attribute::VScaleRange); + auto Min = Attr.getVScaleRangeMin(); + auto Max = Attr.getVScaleRangeMax(); + if (Max && Min == Max) + return Max; + } + + return TTI->getVScaleForTuning(); +} +VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() { + assert(!VPlans.empty()); + + ElementCount ScalarFactor = ElementCount::getFixed(1); + InstructionCost ExpectedCost = + getCandidateVFsFor(*VPlans[0])[0].first.ScalarCost; + const VectorizationFactor ScalarCost(ScalarFactor, ExpectedCost, ExpectedCost); VectorizationFactor ChosenFactor = ScalarCost; + assert(hasPlanWithVF(ElementCount::getFixed(1)) && + "Expected Scalar VF to be a candidate"); - bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled; - if (ForceVectorization && VFCandidates.size() > 1) { + bool ForceVectorization = Hints.getForce() == LoopVectorizeHints::FK_Enabled; + if (ForceVectorization) { // Ignore scalar width, because the user explicitly wants vectorization. // Initialize cost to max so that VF = 2 is, at least, chosen during cost // evaluation. ChosenFactor.Cost = InstructionCost::getMax(); } - SmallVector InvalidCosts; - for (const auto &i : VFCandidates) { - // The cost for scalar VF=1 is already calculated, so ignore it. - if (i.isScalar()) - continue; - - VectorizationCostTy C = expectedCost(i, &InvalidCosts); - VectorizationFactor Candidate(i, C.first, ScalarCost.ScalarCost); + for (auto &Plan : VPlans) { + for (const auto &[Candidate, HasVec] : getCandidateVFsFor(*Plan)) { + if (Candidate.Width.isScalar()) + continue; -#ifndef NDEBUG - unsigned AssumedMinimumVscale = 1; - if (std::optional VScale = getVScaleForTuning()) - AssumedMinimumVscale = *VScale; - unsigned Width = - Candidate.Width.isScalable() - ? Candidate.Width.getKnownMinValue() * AssumedMinimumVscale - : Candidate.Width.getFixedValue(); - LLVM_DEBUG(dbgs() << "LV: Vector loop of width " << i - << " costs: " << (Candidate.Cost / Width)); - if (i.isScalable()) - LLVM_DEBUG(dbgs() << " (assuming a minimum vscale of " - << AssumedMinimumVscale << ")"); - LLVM_DEBUG(dbgs() << ".\n"); -#endif + if (!HasVec && !ForceVectorization) { + LLVM_DEBUG( + dbgs() + << "LV: Not considering vector loop of width " << Candidate.Width + << " because it will not generate any vector instructions.\n"); + continue; + } - if (!C.second && !ForceVectorization) { - LLVM_DEBUG( - dbgs() << "LV: Not considering vector loop of width " << i - << " because it will not generate any vector instructions.\n"); - continue; + if (isMoreProfitable(Candidate, ChosenFactor)) + ChosenFactor = Candidate; } - - // If profitable add it to ProfitableVF list. - if (isMoreProfitable(Candidate, ScalarCost)) - ProfitableVFs.push_back(Candidate); - - if (isMoreProfitable(Candidate, ChosenFactor)) - ChosenFactor = Candidate; - } - - emitInvalidCostRemarks(InvalidCosts, ORE, TheLoop); - - if (!EnableCondStoresVectorization && NumPredStores) { - reportVectorizationFailure("There are conditional stores.", - "store that is conditionally executed prevents vectorization", - "ConditionalStore", ORE, TheLoop); - ChosenFactor = ScalarCost; } LLVM_DEBUG(if (ForceVectorization && !ChosenFactor.Width.isScalar() && @@ -5551,7 +5514,7 @@ return ChosenFactor; } -bool LoopVectorizationCostModel::isCandidateForEpilogueVectorization( +bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization( const Loop &L, ElementCount VF) const { // Cross iteration phis such as reductions need special handling and are // currently unsupported. @@ -5582,7 +5545,7 @@ return true; } -bool LoopVectorizationCostModel::isEpilogueVectorizationProfitable( +bool LoopVectorizationPlanner::isEpilogueVectorizationProfitable( const ElementCount VF) const { // FIXME: We need a much better cost-model to take different parameters such // as register pressure, code size increase and cost of extra branches into @@ -5590,12 +5553,12 @@ // with vectorization factors larger than a certain value. // Allow the target to opt out entirely. - if (!TTI.preferEpilogueVectorization()) + if (!TTI->preferEpilogueVectorization()) return false; // We also consider epilogue vectorization unprofitable for targets that don't // consider interleaving beneficial (eg. MVE). - if (TTI.getMaxInterleaveFactor(VF) <= 1) + if (TTI->getMaxInterleaveFactor(VF) <= 1) return false; unsigned Multiplier = 1; @@ -5606,16 +5569,15 @@ return false; } -VectorizationFactor -LoopVectorizationCostModel::selectEpilogueVectorizationFactor( - const ElementCount MainLoopVF, const LoopVectorizationPlanner &LVP) { +VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor( + const ElementCount MainLoopVF) { VectorizationFactor Result = VectorizationFactor::Disabled(); if (!EnableEpilogueVectorization) { LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization is disabled.\n"); return Result; } - if (!isScalarEpilogueAllowed()) { + if (!CM.isScalarEpilogueAllowed()) { LLVM_DEBUG(dbgs() << "LEV: Unable to vectorize epilogue because no " "epilogue is allowed.\n"); return Result; @@ -5623,7 +5585,7 @@ // Not really a cost consideration, but check for unsupported cases here to // simplify the logic. - if (!isCandidateForEpilogueVectorization(*TheLoop, MainLoopVF)) { + if (!isCandidateForEpilogueVectorization(*OrigLoop, MainLoopVF)) { LLVM_DEBUG(dbgs() << "LEV: Unable to vectorize epilogue because the loop " "is not a supported candidate.\n"); return Result; @@ -5632,7 +5594,7 @@ if (EpilogueVectorizationForceVF > 1) { LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization factor is forced.\n"); ElementCount ForcedEC = ElementCount::getFixed(EpilogueVectorizationForceVF); - if (LVP.hasPlanWithVF(ForcedEC)) + if (hasPlanWithVF(ForcedEC)) return {ForcedEC, 0, 0}; else { LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization forced factor is not " @@ -5641,8 +5603,8 @@ } } - if (TheLoop->getHeader()->getParent()->hasOptSize() || - TheLoop->getHeader()->getParent()->hasMinSize()) { + if (OrigLoop->getHeader()->getParent()->hasOptSize() || + OrigLoop->getHeader()->getParent()->hasMinSize()) { LLVM_DEBUG( dbgs() << "LEV: Epilogue vectorization skipped due to opt for size.\n"); return Result; @@ -5664,13 +5626,16 @@ EstimatedRuntimeVF *= *VScale; } - for (auto &NextVF : ProfitableVFs) - if (((!NextVF.Width.isScalable() && MainLoopVF.isScalable() && - ElementCount::isKnownLT(NextVF.Width, EstimatedRuntimeVF)) || - ElementCount::isKnownLT(NextVF.Width, MainLoopVF)) && - (Result.Width.isScalar() || isMoreProfitable(NextVF, Result)) && - LVP.hasPlanWithVF(NextVF.Width)) - Result = NextVF; + for (auto &VPlan : VPlans) { + for (const auto &[NextVF, _] : getCandidateVFsFor(*VPlan)) { + assert(VPlan->hasVF(NextVF.Width) && "VF not in plan"); + if (((!NextVF.Width.isScalable() && MainLoopVF.isScalable() && + ElementCount::isKnownLT(NextVF.Width, EstimatedRuntimeVF)) || + ElementCount::isKnownLT(NextVF.Width, MainLoopVF)) && + (Result.Width.isScalar() || isMoreProfitable(NextVF, Result))) + Result = NextVF; + } + } if (Result != VectorizationFactor::Disabled()) LLVM_DEBUG(dbgs() << "LEV: Vectorizing epilogue loop with VF = " @@ -6370,8 +6335,7 @@ return Discount; } -LoopVectorizationCostModel::VectorizationCostTy -LoopVectorizationCostModel::expectedCost( +VectorizationCostTy LoopVectorizationCostModel::expectedCost( ElementCount VF, SmallVectorImpl *Invalid) { VectorizationCostTy Cost; @@ -6823,7 +6787,7 @@ return getWideningCost(I, VF); } -LoopVectorizationCostModel::VectorizationCostTy +VectorizationCostTy LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF) { // If we know that this instruction will remain uniform, check the cost of @@ -7630,7 +7594,7 @@ return VectorizationFactor::Disabled(); // Select the optimal vectorization factor. - VectorizationFactor VF = CM.selectVectorizationFactor(VFCandidates); + VectorizationFactor VF = selectVectorizationFactor(); assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero."); if (!hasPlanWithVF(VF.Width)) { LLVM_DEBUG(dbgs() << "LV: No VPlan could be built for " << VF.Width @@ -7646,7 +7610,7 @@ 1 && "Best VF has not a single VPlan."); - for (const VPlanPtr &Plan : VPlans) { + for (const auto &Plan : VPlans) { if (Plan->hasVF(VF)) return *Plan.get(); } @@ -8102,7 +8066,9 @@ auto MaxVFTimes2 = MaxVF * 2; for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) { VFRange SubRange = {VF, MaxVFTimes2}; - VPlans.push_back(buildVPlan(SubRange)); + VPlans.emplace_back(buildVPlan(SubRange)); + CandidateVFs[&*VPlans.back()] = + SmallVector>(); VF = SubRange.End; } } @@ -8719,13 +8685,52 @@ auto &ConditionalAssumes = Legal->getConditionalAssumes(); DeadInstructions.insert(ConditionalAssumes.begin(), ConditionalAssumes.end()); + InstructionCost ScalarCost = CM.expectedCost(ElementCount::getFixed(1)).first; + LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ScalarCost << ".\n"); + + SmallVector InvalidCosts; auto MaxVFTimes2 = MaxVF * 2; for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) { VFRange SubRange = {VF, MaxVFTimes2}; - if (auto Plan = tryToBuildVPlanWithVPRecipes(SubRange, DeadInstructions)) - VPlans.push_back(std::move(*Plan)); + auto Plan = tryToBuildVPlanWithVPRecipes(SubRange, DeadInstructions); + if (!Plan) { + VF = SubRange.End; + continue; + } + SmallVector> Costs; + for (ElementCount CostVF = VF; + ElementCount::isKnownLT(CostVF, SubRange.End); CostVF *= 2) { + auto [VecCost, IsVec] = CM.expectedCost(CostVF, &InvalidCosts); + Costs.emplace_back(VectorizationFactor(CostVF, VecCost, ScalarCost), + IsVec); +#ifndef NDEBUG + unsigned AssumedMinimumVscale = 1; + if (std::optional VScale = getVScaleForTuning()) + AssumedMinimumVscale = *VScale; + unsigned Width = CostVF.isScalable() + ? CostVF.getKnownMinValue() * AssumedMinimumVscale + : CostVF.getFixedValue(); + LLVM_DEBUG(dbgs() << "LV: Vector loop of width " << CostVF + << " costs: " << (VecCost / Width)); + if (CostVF.isScalable()) + LLVM_DEBUG(dbgs() << " (assuming a minimum vscale of " + << AssumedMinimumVscale << ")"); + LLVM_DEBUG(dbgs() << ".\n"); +#endif + } + VPlans.emplace_back(std::move(*Plan)); + CandidateVFs[&*VPlans.back()] = Costs; VF = SubRange.End; } + emitInvalidCostRemarks(InvalidCosts, ORE, OrigLoop); + + if (!EnableCondStoresVectorization && CM.hasPredStores()) { + reportVectorizationFailure( + "There are conditional stores.", + "store that is conditionally executed prevents vectorization", + "ConditionalStore", ORE, OrigLoop); + VPlans.clear(); + } } // Add the necessary canonical IV and branch recipes required to control the @@ -10389,7 +10394,7 @@ // Consider vectorizing the epilogue too if it's profitable. VectorizationFactor EpilogueVF = - CM.selectEpilogueVectorizationFactor(VF.Width, LVP); + LVP.selectEpilogueVectorizationFactor(VF.Width); if (EpilogueVF.Width.isVector()) { // The first pass vectorizes the main loop and creates a scalar epilogue diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2320,6 +2320,8 @@ UFs.insert(UF); } + const SmallSetVector getVFs() const { return VFs; } + /// Return a string with the name of the plan and the applicable VFs and UFs. std::string getName() const; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll @@ -7,7 +7,7 @@ ; CHECK-LABEL: all_scalar ; CHECK: LV: Found scalar instruction: %i.next = add nuw nsw i64 %i, 2 ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %i.next = add nuw nsw i64 %i, 2 -; CHECK: LV: Not considering vector loop of width 2 because it will not generate any vector instructions +; CHECK: LV: Selecting VF: 1 ; define void @all_scalar(ptr %a, i64 %n) { entry: