Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1625,11 +1625,17 @@ private: unsigned NumPredStores = 0; + /// \return UserVF if it is non-zero and there are no dependences, otherwise + /// a clamped value. For scalable UserVF, the resulting feasible VF may be a + /// fixed-width VF. + Optional computeFeasibleUserVF(ElementCount UserVF, + unsigned WidestType); + /// \return An upper bound for the vectorization factor, a power-of-2 larger /// than zero. One is returned if vectorization should best be avoided due /// to cost. ElementCount computeFeasibleMaxVF(unsigned ConstTripCount, - ElementCount UserVF); + unsigned SmallestType, unsigned WidestType); /// The vectorization cost is a combination of the cost itself and a boolean /// indicating whether any of the contributing operations will actually @@ -5505,9 +5511,23 @@ return None; } + MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI); + unsigned SmallestType, WidestType; + std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes(); + + auto GetFeasibleMaxVF = [&]() -> ElementCount { + // First analyze the UserVF, fall back if the UserVF should be ignored. + Optional MaybeMaxVF = + computeFeasibleUserVF(UserVF, WidestType); + if (!MaybeMaxVF) + MaybeMaxVF = computeFeasibleMaxVF(TC, SmallestType, WidestType); + + return MaybeMaxVF.getValue(); + }; + switch (ScalarEpilogueStatus) { case CM_ScalarEpilogueAllowed: - return computeFeasibleMaxVF(TC, UserVF); + return GetFeasibleMaxVF(); case CM_ScalarEpilogueNotAllowedUsePredicate: LLVM_FALLTHROUGH; case CM_ScalarEpilogueNotNeededUsePredicate: @@ -5545,7 +5565,7 @@ LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking: vectorize with a " "scalar epilogue instead.\n"); ScalarEpilogueStatus = CM_ScalarEpilogueAllowed; - return computeFeasibleMaxVF(TC, UserVF); + return GetFeasibleMaxVF(); } return None; } @@ -5562,7 +5582,7 @@ InterleaveInfo.invalidateGroupsRequiringScalarEpilogue(); } - ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF); + ElementCount MaxVF = GetFeasibleMaxVF(); assert(!MaxVF.isScalable() && "Scalable vectors do not yet support tail folding"); assert((UserVF.isNonZero() || isPowerOf2_32(MaxVF.getFixedValue())) && @@ -5623,97 +5643,91 @@ return None; } -ElementCount -LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount, - ElementCount UserVF) { - bool IgnoreScalableUserVF = UserVF.isScalable() && - !TTI.supportsScalableVectors() && - !ForceTargetSupportsScalableVectors; - if (IgnoreScalableUserVF) { - LLVM_DEBUG( - dbgs() << "LV: Ignoring VF=" << UserVF - << " because target does not support scalable vectors.\n"); - ORE->emit([&]() { - return OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreScalableUserVF", - TheLoop->getStartLoc(), - TheLoop->getHeader()) - << "Ignoring VF=" << ore::NV("UserVF", UserVF) - << " because target does not support scalable vectors."; - }); - } - - // Beyond this point two scenarios are handled. If UserVF isn't specified - // then a suitable VF is chosen. If UserVF is specified and there are - // dependencies, check if it's legal. However, if a UserVF is specified and - // there are no dependencies, then there's nothing to do. - if (UserVF.isNonZero() && !IgnoreScalableUserVF && - Legal->isSafeForAnyVectorWidth()) - return UserVF; +Optional +LoopVectorizationCostModel::computeFeasibleUserVF(ElementCount UserVF, + unsigned WidestType) { + if (!UserVF.isNonZero()) + return None; - MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI); - unsigned SmallestType, WidestType; - std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes(); - unsigned WidestRegister = TTI.getRegisterBitWidth(true); + if (UserVF.isScalable() && !TTI.supportsScalableVectors() && + !ForceTargetSupportsScalableVectors) { + OptimizationRemarkAnalysis R(DEBUG_TYPE, "IgnoreScalableUserVF", + TheLoop->getStartLoc(), TheLoop->getHeader()); + R << "Ignoring VF=" << ore::NV("UserVF", UserVF) + << " because target does not support scalable vectors."; + LLVM_DEBUG(dbgs() << "LV: " << R.getMsg() << "\n"); + ORE->emit(R); + return None; + } // Get the maximum safe dependence distance in bits computed by LAA. // It is computed by MaxVF * sizeOf(type) * 8, where type is taken from // the memory accesses that is most restrictive (involved in the smallest // dependence distance). unsigned MaxSafeVectorWidthInBits = Legal->getMaxSafeVectorWidthInBits(); + unsigned MaxSafeElements = + PowerOf2Floor(MaxSafeVectorWidthInBits / WidestType); - // If the user vectorization factor is legally unsafe, clamp it to a safe - // value. Otherwise, return as is. - if (UserVF.isNonZero() && !IgnoreScalableUserVF) { - unsigned MaxSafeElements = - PowerOf2Floor(MaxSafeVectorWidthInBits / WidestType); - ElementCount MaxSafeVF = ElementCount::getFixed(MaxSafeElements); - - if (UserVF.isScalable()) { - Optional MaxVScale = TTI.getMaxVScale(); - - // Scale VF by vscale before checking if it's safe. - MaxSafeVF = ElementCount::getScalable( - MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0); - - if (MaxSafeVF.isZero()) { - // The dependence distance is too small to use scalable vectors, - // fallback on fixed. - LLVM_DEBUG( - dbgs() - << "LV: Max legal vector width too small, scalable vectorization " - "unfeasible. Using fixed-width vectorization instead.\n"); - ORE->emit([&]() { - return OptimizationRemarkAnalysis(DEBUG_TYPE, "ScalableVFUnfeasible", - TheLoop->getStartLoc(), - TheLoop->getHeader()) - << "Max legal vector width too small, scalable vectorization " - << "unfeasible. Using fixed-width vectorization instead."; - }); - return computeFeasibleMaxVF( - ConstTripCount, ElementCount::getFixed(UserVF.getKnownMinValue())); - } - } + // If UserVF is specified and there are dependencies, check if it's legal. + if (Legal->isSafeForAnyVectorWidth()) + return UserVF; - LLVM_DEBUG(dbgs() << "LV: The max safe VF is: " << MaxSafeVF << ".\n"); + ElementCount MaxSafeVF = ElementCount::getFixed(MaxSafeElements); - if (ElementCount::isKnownLE(UserVF, MaxSafeVF)) - return UserVF; + if (UserVF.isScalable()) { + Optional MaxVScale = TTI.getMaxVScale(); - LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF - << " is unsafe, clamping to max safe VF=" << MaxSafeVF - << ".\n"); - ORE->emit([&]() { - return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor", - TheLoop->getStartLoc(), - TheLoop->getHeader()) - << "User-specified vectorization factor " - << ore::NV("UserVectorizationFactor", UserVF) - << " is unsafe, clamping to maximum safe vectorization factor " - << ore::NV("VectorizationFactor", MaxSafeVF); - }); - return MaxSafeVF; + // Scale VF by vscale before checking if it's safe. + MaxSafeVF = ElementCount::getScalable( + MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0); + + if (MaxSafeVF.isZero()) { + // The dependence distance is too small to use scalable vectors, + // fallback on fixed. + LLVM_DEBUG( + dbgs() + << "LV: Max legal vector width too small, scalable vectorization " + "unfeasible. Using fixed-width vectorization instead.\n"); + ORE->emit([&]() { + return OptimizationRemarkAnalysis(DEBUG_TYPE, "ScalableVFUnfeasible", + TheLoop->getStartLoc(), + TheLoop->getHeader()) + << "Max legal vector width too small, scalable vectorization " + << "unfeasible. Using fixed-width vectorization instead."; + }); + return computeFeasibleUserVF( + ElementCount::getFixed(UserVF.getKnownMinValue()), WidestType); + } } + LLVM_DEBUG(dbgs() << "LV: The max safe VF is: " << MaxSafeVF << ".\n"); + if (ElementCount::isKnownLE(UserVF, MaxSafeVF)) + return UserVF; + + LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF + << " is unsafe, clamping to max safe VF=" << MaxSafeVF + << ".\n"); + ORE->emit([&]() { + return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor", + TheLoop->getStartLoc(), + TheLoop->getHeader()) + << "User-specified vectorization factor " + << ore::NV("UserVectorizationFactor", UserVF) + << " is unsafe, clamping to maximum safe vectorization factor " + << ore::NV("VectorizationFactor", MaxSafeVF); + }); + return MaxSafeVF; +} + +ElementCount LoopVectorizationCostModel::computeFeasibleMaxVF( + unsigned ConstTripCount, unsigned SmallestType, unsigned WidestType) { + // Get the maximum safe dependence distance in bits computed by LAA. + // It is computed by MaxVF * sizeOf(type) * 8, where type is taken from + // the memory accesses that is most restrictive (involved in the smallest + // dependence distance). + unsigned MaxSafeVectorWidthInBits = Legal->getMaxSafeVectorWidthInBits(); + + unsigned WidestRegister = TTI.getRegisterBitWidth(true); WidestRegister = std::min(WidestRegister, MaxSafeVectorWidthInBits); // Ensure MaxVF is a power of 2; the dependence distance bound may not be.