diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1065,7 +1065,7 @@ /// \return An upper bound for the vectorization factor, or None if /// vectorization and interleaving should be avoided up front. - Optional computeMaxVF(unsigned UserVF, unsigned UserIC); + Optional computeMaxVF(ElementCount UserVF, unsigned UserIC); /// \return True if runtime checks are required for vectorization, and false /// otherwise. @@ -1442,7 +1442,7 @@ /// \return An upper bound for the vectorization factor, a power-of-2 larger /// than zero. One is returned if vectorization should best be avoided due /// to cost. - unsigned computeFeasibleMaxVF(unsigned ConstTripCount); + ElementCount computeFeasibleMaxVF(unsigned ConstTripCount); /// The vectorization cost is a combination of the cost itself and a boolean /// indicating whether any of the contributing operations will actually @@ -5214,8 +5214,8 @@ return false; } -Optional LoopVectorizationCostModel::computeMaxVF(unsigned UserVF, - unsigned UserIC) { +Optional +LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { if (Legal->getRuntimePointerChecking()->Need && TTI.hasBranchDivergence()) { // TODO: It may by useful to do since it's still likely to be dynamically // uniform if the target can skip. @@ -5273,9 +5273,13 @@ InterleaveInfo.invalidateGroupsRequiringScalarEpilogue(); } - unsigned MaxVF = UserVF ? UserVF : computeFeasibleMaxVF(TC); - assert((UserVF || isPowerOf2_32(MaxVF)) && "MaxVF must be a power of 2"); - unsigned MaxVFtimesIC = UserIC ? MaxVF * UserIC : MaxVF; + ElementCount MaxVF = UserVF ? UserVF : computeFeasibleMaxVF(TC); + assert(!MaxVF.isScalable() && + "Scalable vectors do not yet support tail folding"); + assert((UserVF.isNonZero() || isPowerOf2_32(MaxVF.getFixedValue())) && + "MaxVF must be a power of 2"); + unsigned MaxVFtimesIC = + UserIC ? MaxVF.getFixedValue() * UserIC : MaxVF.getFixedValue(); if (TC > 0 && TC % MaxVFtimesIC == 0) { // Accept MaxVF if we do not have a tail. LLVM_DEBUG(dbgs() << "LV: No tail will remain for any chosen VF.\n"); @@ -5321,7 +5325,7 @@ return None; } -unsigned +ElementCount LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount) { MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI); unsigned SmallestType, WidestType; @@ -5350,7 +5354,7 @@ if (MaxVectorSize == 0) { LLVM_DEBUG(dbgs() << "LV: The target has no vector registers.\n"); MaxVectorSize = 1; - return MaxVectorSize; + return ElementCount::getFixed(MaxVectorSize); } else if (ConstTripCount && ConstTripCount < MaxVectorSize && isPowerOf2_32(ConstTripCount)) { // We need to clamp the VF to be the ConstTripCount. There is no point in @@ -5358,7 +5362,7 @@ LLVM_DEBUG(dbgs() << "LV: Clamping the MaxVF to the constant trip count: " << ConstTripCount << "\n"); MaxVectorSize = ConstTripCount; - return MaxVectorSize; + return ElementCount::getFixed(MaxVectorSize); } unsigned MaxVF = MaxVectorSize; @@ -5396,7 +5400,7 @@ } } } - return MaxVF; + return ElementCount::getFixed(MaxVF); } VectorizationFactor @@ -6984,8 +6988,7 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { assert(!UserVF.isScalable() && "scalable vectorization not yet handled"); assert(OrigLoop->isInnermost() && "Inner loop expected."); - Optional MaybeMaxVF = - CM.computeMaxVF(UserVF.getKnownMinValue(), UserIC); + Optional MaybeMaxVF = CM.computeMaxVF(UserVF, UserIC); if (!MaybeMaxVF) // Cases that should not to be vectorized nor interleaved. return None; @@ -7016,7 +7019,7 @@ return {{UserVF, 0}}; } - ElementCount MaxVF = ElementCount::getFixed(MaybeMaxVF.getValue()); + ElementCount MaxVF = MaybeMaxVF.getValue(); assert(MaxVF.isNonZero() && "MaxVF is zero."); for (ElementCount VF = ElementCount::getFixed(1);