Index: llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -234,6 +234,9 @@ explicit operator bool() const { return FixedVF || ScalableVF; } bool hasFixedVF() const { return FixedVF.hasValue(); } bool hasScalableVF() const { return ScalableVF.hasValue(); } + bool isOnlyScalar() const { + return FixedVF && FixedVF->isScalar() && !ScalableVF; + } Optional getFixedVF() const { return FixedVF; } Optional getScalableVF() const { return ScalableVF; } }; Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1263,7 +1263,8 @@ /// then this vectorization factor will be selected if vectorization is /// possible. VectorizationFactor - selectVectorizationFactor(const MinMaxVFCandidates &MaxVF); + selectVectorizationFactor(const MinMaxVFCandidates &MinVFs, + const MinMaxVFCandidates &MaxVFs); VectorizationFactor selectEpilogueVectorizationFactor(const ElementCount MaxVF, const LoopVectorizationPlanner &LVP); @@ -5606,6 +5607,8 @@ // First analyze the UserVF, fall back if the UserVF should be ignored. if (auto MaybeMaxVF = computeFeasibleUserVF(UserVF, WidestType)) Result.addMaxVF(*MaybeMaxVF); + else if (Hints->isForcedScalable() && TTI.supportsScalableVectors()) + Result.addMaxVF(ElementCount::getFixed(1)); else Result.addMaxVF(computeFeasibleMaxVF(TC, SmallestType, WidestType, /*ComputeMaxScalableVF=*/false)); @@ -5936,15 +5939,8 @@ } VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor( + const MinMaxVFCandidates &MinFactors, const MinMaxVFCandidates &MaxFactors) { - // Ignore the Scalable MaxVF in decision making process for now. - ElementCount MaxVF = *MaxFactors.getFixedVF(); - - // FIXME: This can be fixed for scalable vectors later, because at this stage - // the LoopVectorizer will only consider vectorizing a loop with scalable - // vectors when the loop has a hint to enable vectorization for a given VF. - assert(!MaxVF.isScalable() && "scalable vectors not yet supported"); - InstructionCost ExpectedCost = expectedCost(ElementCount::getFixed(1)).first; LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ExpectedCost << ".\n"); assert(ExpectedCost.isValid() && "Unexpected invalid cost for scalar loop"); @@ -5954,21 +5950,22 @@ float Cost = ScalarCost; bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled; - if (ForceVectorization && MaxVF.isVector()) { + if (ForceVectorization && !MaxFactors.isOnlyScalar()) { // Ignore scalar width, because the user explicitly wants vectorization. // Initialize cost to max so that VF = 2 is, at least, chosen during cost // evaluation. Cost = std::numeric_limits::max(); } - for (auto i = ElementCount::getFixed(2); ElementCount::isKnownLE(i, MaxVF); - i *= 2) { + SmallVector VFCandidates; + genFeasibleVFCandidates(*this, VFCandidates, MinFactors, MaxFactors); + for (const auto &i : VFCandidates) { // Notice that the vector loop needs to be executed less times, so // we need to divide the cost of the vector loops by the width of // the vector elements. VectorizationCostTy C = expectedCost(i); assert(C.first.isValid() && "Unexpected invalid cost for vector loop"); - float VectorCost = *C.first.getValue() / (float)i.getFixedValue(); + float VectorCost = *C.first.getValue() / (float)i.getKnownMinValue(); LLVM_DEBUG(dbgs() << "LV: Vector loop of width " << i << " costs: " << (int)VectorCost << ".\n"); if (!C.second && !ForceVectorization) { @@ -7617,12 +7614,7 @@ } } - unsigned N; - if (isScalarAfterVectorization(I, VF)) { - assert(!VF.isScalable() && "VF is assumed to be non scalable"); - N = VF.getKnownMinValue(); - } else - N = 1; + unsigned N = isScalarAfterVectorization(I, VF) ? VF.getKnownMinValue() : 1; return N * TTI.getCastInstrCost(Opcode, VectorTy, SrcVecTy, CCH, CostKind, I); } @@ -7827,23 +7819,18 @@ assert(MaxUserVF.isNonZero() && "MaxUserVF is zero."); bool UserVFIsLegal = ElementCount::isKnownLE(UserVF, MaxUserVF); - if (!UserVF.isZero() && - (UserVFIsLegal || (UserVF.isScalable() && MaxUserVF.isScalable()))) { - // FIXME: MaxUserVF is temporarily used inplace of UserVF for illegal - // scalable VFs here, this should be reverted to only use legal UserVFs once - // the loop below supports scalable VFs. - ElementCount VF = UserVFIsLegal ? UserVF : MaxUserVF; + if (!UserVF.isZero() && UserVFIsLegal) { LLVM_DEBUG(dbgs() << "LV: Using " << (UserVFIsLegal ? "user" : "max") - << " VF " << VF << ".\n"); - assert(isPowerOf2_32(VF.getKnownMinValue()) && + << " VF " << UserVF << ".\n"); + assert(isPowerOf2_32(UserVF.getKnownMinValue()) && "VF needs to be a power of two"); // Collect the instructions (and their associated costs) that will be more // profitable to scalarize. - CM.selectUserVectorizationFactor(VF); + CM.selectUserVectorizationFactor(UserVF); CM.collectInLoopReductions(); - buildVPlansWithVPRecipes({VF}, {VF}); + buildVPlansWithVPRecipes({UserVF}, {UserVF}); LLVM_DEBUG(printPlans(dbgs())); - return {{VF, 0}}; + return {{UserVF, 0}}; } MinMaxVFCandidates MinFactors(ElementCount::getFixed(1), @@ -7866,16 +7853,13 @@ buildVPlansWithVPRecipes(MinFactors, MaxFactors); LLVM_DEBUG(printPlans(dbgs())); - if (Optional FixedMaxVF = MaxFactors.getFixedVF()) - if (FixedMaxVF->isScalar()) { - assert((!MaxFactors.hasScalableVF() || - MaxFactors.getScalableVF()->isScalar()) && - "Unexpected max scalable VF"); - return VectorizationFactor::Disabled(); - } + if (MaxFactors.isOnlyScalar()) + return VectorizationFactor::Disabled(); // Select the optimal vectorization factor. - return CM.selectVectorizationFactor(MaxFactors); + MinFactors = MinMaxVFCandidates(ElementCount::getFixed(2), + ElementCount::getScalable(1)); + return CM.selectVectorizationFactor(MinFactors, MaxFactors); } void LoopVectorizationPlanner::setBestPlan(ElementCount VF, unsigned UF) { Index: llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-analysis.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-analysis.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-analysis.ll @@ -10,9 +10,13 @@ define void @test0(i32* %a, i8* %b, i32* %c) { ; CHECK: LV: Checking a loop in "test0" ; CHECK_ON: LV: Found feasible scalable VF = vscale x 4 +; CHECK_ON: LV: Selecting VF: vscale x 4. ; CHECK_ALWAYSON: LV: Found feasible scalable VF = vscale x 4 +; CHECK_ALWAYSON: LV: Selecting VF: vscale x 4. ; CHECK_DISABLED-NOT: LV: Found feasible scalable VF +; CHECK_DISABLED: LV: Selecting VF: 1 ; CHECK_MAXBW: LV: Found feasible scalable VF = vscale x 16 +; CHECK_MAXBW: LV: Selecting VF: vscale x 16. entry: br label %loop @@ -38,9 +42,13 @@ define void @test1(i32* %a, i8* %b) { ; CHECK: LV: Checking a loop in "test1" ; CHECK_ON: LV: Found feasible scalable VF = vscale x 4 +; CHECK_ON: LV: Selecting VF: vscale x 4. ; CHECK_ALWAYSON: LV: Found feasible scalable VF = vscale x 4 +; CHECK_ALWAYSON: LV: Selecting VF: vscale x 4. ; CHECK_DISABLED-NOT: LV: Found feasible scalable VF +; CHECK_DISABLED: LV: Selecting VF: 1 ; CHECK_MAXBW: LV: Found feasible scalable VF = vscale x 4 +; CHECK_MAXBW: LV: Selecting VF: 16. entry: br label %loop @@ -68,9 +76,13 @@ define void @test2(i32* %a, i8* %b) { ; CHECK: LV: Checking a loop in "test2" ; CHECK_ON: LV: Found feasible scalable VF = vscale x 2 +; CHECK_ON: LV: Selecting VF: vscale x 2. ; CHECK_ALWAYSON: LV: Found feasible scalable VF = vscale x 2 +; CHECK_ALWAYSON: LV: Selecting VF: vscale x 2. ; CHECK_DISABLED-NOT: LV: Found feasible scalable VF +; CHECK_DISABLED: LV: Selecting VF: 1 ; CHECK_MAXBW: LV: Found feasible scalable VF = vscale x 2 +; CHECK_MAXBW: LV: Selecting VF: 16. entry: br label %loop @@ -98,9 +110,13 @@ define void @test3(i32* %a, i8* %b) { ; CHECK: LV: Checking a loop in "test3" ; CHECK_ON: LV: Found feasible scalable VF = vscale x 1 +; CHECK_ON: LV: Selecting VF: 1. ; CHECK_ALWAYSON: LV: Found feasible scalable VF = vscale x 1 +; CHECK_ALWAYSON: LV: Selecting VF: 1. ; CHECK_DISABLED-NOT: LV: Found feasible scalable VF +; CHECK_DISABLED: LV: Selecting VF: 1 ; CHECK_MAXBW: LV: Found feasible scalable VF = vscale x 1 +; CHECK_MAXBW: LV: Selecting VF: 16. entry: br label %loop Index: llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll @@ -179,7 +179,7 @@ ; CHECK-DBG: LV: The max safe VF is: vscale x 2. ; CHECK-DBG: LV: User VF=vscale x 4 is unsafe, clamping to max safe VF=vscale x 2. ; CHECK-DBG: remark: :0:0: User-specified vectorization factor vscale x 4 is unsafe, clamping to maximum safe vectorization factor vscale x 2 -; CHECK-DBG: LV: Using max VF vscale x 2 +; CHECK-DBG: LV: Selecting VF: vscale x 2. ; CHECK-LABEL: @test4 ; CHECK: define void @test4(i32* %a, i32* %b) { @@ -274,7 +274,7 @@ ; CHECK-DBG: LV: The max safe VF is: vscale x 8. ; CHECK-DBG: LV: User VF=vscale x 16 is unsafe, clamping to max safe VF=vscale x 8. ; CHECK-DBG: remark: :0:0: User-specified vectorization factor vscale x 16 is unsafe, clamping to maximum safe vectorization factor vscale x 8 -; CHECK-DBG: LV: Using max VF vscale x 8 +; CHECK-DBG: LV: Selecting VF: vscale x 8. ; CHECK-LABEL: @test6 ; CHECK: define void @test6(i32* %a, i32* %b) {