Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1061,7 +1061,7 @@ /// \return An upper bound for the vectorization factor, or None if /// vectorization and interleaving should be avoided up front. - Optional computeMaxVF(unsigned UserVF, unsigned UserIC); + Optional computeMaxVF(ElementCount UserVF, unsigned UserIC); /// \return True if runtime checks are required for vectorization, and false /// otherwise. @@ -1438,7 +1438,7 @@ /// \return An upper bound for the vectorization factor, a power-of-2 larger /// than zero. One is returned if vectorization should best be avoided due /// to cost. - unsigned computeFeasibleMaxVF(unsigned ConstTripCount); + ElementCount computeFeasibleMaxVF(unsigned ConstTripCount); /// The vectorization cost is a combination of the cost itself and a boolean /// indicating whether any of the contributing operations will actually @@ -5208,8 +5208,8 @@ return false; } -Optional LoopVectorizationCostModel::computeMaxVF(unsigned UserVF, - unsigned UserIC) { +Optional +LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { if (Legal->getRuntimePointerChecking()->Need && TTI.hasBranchDivergence()) { // TODO: It may by useful to do since it's still likely to be dynamically // uniform if the target can skip. @@ -5267,9 +5267,13 @@ InterleaveInfo.invalidateGroupsRequiringScalarEpilogue(); } - unsigned MaxVF = UserVF ? UserVF : computeFeasibleMaxVF(TC); - assert((UserVF || isPowerOf2_32(MaxVF)) && "MaxVF must be a power of 2"); - unsigned MaxVFtimesIC = UserIC ? MaxVF * UserIC : MaxVF; + ElementCount MaxVF = UserVF ? UserVF : computeFeasibleMaxVF(TC); + assert(!MaxVF.isScalable() && + "Scalable vectors do not yet support tail folding"); + assert((UserVF.isNonZero() || isPowerOf2_32(MaxVF.getFixedValue())) && + "MaxVF must be a power of 2"); + unsigned MaxVFtimesIC = + UserIC ? MaxVF.getFixedValue() * UserIC : MaxVF.getFixedValue(); if (TC > 0 && TC % MaxVFtimesIC == 0) { // Accept MaxVF if we do not have a tail. LLVM_DEBUG(dbgs() << "LV: No tail will remain for any chosen VF.\n"); @@ -5315,7 +5319,7 @@ return None; } -unsigned +ElementCount LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount) { MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI); unsigned SmallestType, WidestType; @@ -5344,7 +5348,7 @@ if (MaxVectorSize == 0) { LLVM_DEBUG(dbgs() << "LV: The target has no vector registers.\n"); MaxVectorSize = 1; - return MaxVectorSize; + return ElementCount::getFixed(MaxVectorSize); } else if (ConstTripCount && ConstTripCount < MaxVectorSize && isPowerOf2_32(ConstTripCount)) { // We need to clamp the VF to be the ConstTripCount. There is no point in @@ -5352,7 +5356,7 @@ LLVM_DEBUG(dbgs() << "LV: Clamping the MaxVF to the constant trip count: " << ConstTripCount << "\n"); MaxVectorSize = ConstTripCount; - return MaxVectorSize; + return ElementCount::getFixed(MaxVectorSize); } unsigned MaxVF = MaxVectorSize; @@ -5390,7 +5394,7 @@ } } } - return MaxVF; + return ElementCount::getFixed(MaxVF); } VectorizationFactor @@ -6969,8 +6973,7 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { assert(!UserVF.isScalable() && "scalable vectorization not yet handled"); assert(OrigLoop->isInnermost() && "Inner loop expected."); - Optional MaybeMaxVF = - CM.computeMaxVF(UserVF.getKnownMinValue(), UserIC); + Optional MaybeMaxVF = CM.computeMaxVF(UserVF, UserIC); if (!MaybeMaxVF) // Cases that should not to be vectorized nor interleaved. return None; @@ -7001,7 +7004,7 @@ return {{UserVF, 0}}; } - ElementCount MaxVF = ElementCount::getFixed(MaybeMaxVF.getValue()); + ElementCount MaxVF = MaybeMaxVF.getValue(); assert(MaxVF.isNonZero() && "MaxVF is zero."); for (ElementCount VF = ElementCount::getFixed(1);