Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1605,11 +1605,16 @@ private: unsigned NumPredStores = 0; + /// \return UserVF directly if it is valid. Otherwise clamp UserVF to the + /// largest valid value. + Optional getFeasibleUserVF(ElementCount UserVF, + unsigned MaxSafeElements); + /// \return An upper bound for the vectorization factor, a power-of-2 larger /// than zero. One is returned if vectorization should best be avoided due /// to cost. ElementCount computeFeasibleMaxVF(unsigned ConstTripCount, - ElementCount UserVF); + unsigned SmallestType, unsigned WidestType); /// The vectorization cost is a combination of the cost itself and a boolean /// indicating whether any of the contributing operations will actually @@ -5549,9 +5554,27 @@ return None; } + auto GetFeasibleMaxVF = [&]() -> ElementCount { + MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI); + unsigned SmallestType, WidestType; + std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes(); + + // Get the maximum safe dependence distance in bits computed by LAA. + // It is computed by MaxVF * sizeOf(type) * 8, where type is taken from + // the memory accesses that is most restrictive (involved in the smallest + // dependence distance). + unsigned MaxSafeElements = + PowerOf2Floor(Legal->getMaxSafeVectorWidthInBits() / WidestType); + + // First analyze the UserVF, fall back if the UserVF should be ignored. + if (auto MaybeMaxVF = getFeasibleUserVF(UserVF, MaxSafeElements)) + return MaybeMaxVF.getValue(); + return computeFeasibleMaxVF(TC, SmallestType, WidestType); + }; + switch (ScalarEpilogueStatus) { case CM_ScalarEpilogueAllowed: - return computeFeasibleMaxVF(TC, UserVF); + return GetFeasibleMaxVF(); case CM_ScalarEpilogueNotAllowedUsePredicate: LLVM_FALLTHROUGH; case CM_ScalarEpilogueNotNeededUsePredicate: @@ -5589,7 +5612,7 @@ LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking: vectorize with a " "scalar epilogue instead.\n"); ScalarEpilogueStatus = CM_ScalarEpilogueAllowed; - return computeFeasibleMaxVF(TC, UserVF); + return GetFeasibleMaxVF(); } return None; } @@ -5606,7 +5629,7 @@ InterleaveInfo.invalidateGroupsRequiringScalarEpilogue(); } - ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF); + ElementCount MaxVF = GetFeasibleMaxVF(); assert(!MaxVF.isScalable() && "Scalable vectors do not yet support tail folding"); assert((UserVF.isNonZero() || isPowerOf2_32(MaxVF.getFixedValue())) && @@ -5668,110 +5691,96 @@ return None; } -ElementCount -LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount, - ElementCount UserVF) { - bool IgnoreScalableUserVF = UserVF.isScalable() && - !TTI.supportsScalableVectors() && - !ForceTargetSupportsScalableVectors; - if (IgnoreScalableUserVF) { - LLVM_DEBUG( - dbgs() << "LV: Ignoring VF=" << UserVF - << " because target does not support scalable vectors.\n"); - ORE->emit([&]() { - return OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreScalableUserVF", - TheLoop->getStartLoc(), - TheLoop->getHeader()) - << "Ignoring VF=" << ore::NV("UserVF", UserVF) - << " because target does not support scalable vectors."; - }); +Optional +LoopVectorizationCostModel::getFeasibleUserVF(ElementCount UserVF, + unsigned MaxSafeElements) { + if (UserVF.isZero()) + return None; + + if (UserVF.isScalable() && !TTI.supportsScalableVectors() && + !ForceTargetSupportsScalableVectors) { + OptimizationRemarkAnalysis R(DEBUG_TYPE, "IgnoreScalableUserVF", + TheLoop->getStartLoc(), TheLoop->getHeader()); + R << "Ignoring VF=" << ore::NV("UserVF", UserVF) + << " because target does not support scalable vectors."; + LLVM_DEBUG(dbgs() << "LV: " << R.getMsg() << "\n"); + ORE->emit(R); + return None; } - // Beyond this point two scenarios are handled. If UserVF isn't specified - // then a suitable VF is chosen. If UserVF is specified and there are - // dependencies, check if it's legal. However, if a UserVF is specified and - // there are no dependencies, then there's nothing to do. - if (UserVF.isNonZero() && !IgnoreScalableUserVF) { - if (!canVectorizeReductions(UserVF)) { - reportVectorizationFailure( - "LV: Scalable vectorization not supported for the reduction " - "operations found in this loop. Using fixed-width " - "vectorization instead.", - "Scalable vectorization not supported for the reduction operations " - "found in this loop. Using fixed-width vectorization instead.", - "ScalableVFUnfeasible", ORE, TheLoop); - return computeFeasibleMaxVF( - ConstTripCount, ElementCount::getFixed(UserVF.getKnownMinValue())); - } + if (!canVectorizeReductions(UserVF)) { + reportVectorizationFailure( + "LV: Scalable vectorization not supported for the reduction " + "operations found in this loop. Using fixed-width " + "vectorization instead.", + "Scalable vectorization not supported for the reduction operations " + "found in this loop. Using fixed-width vectorization instead.", + "ScalableVFUnfeasible", ORE, TheLoop); + // FIXME: The UserVF should actually be ignored in this case. + UserVF = ElementCount::getFixed(UserVF.getKnownMinValue()); + } + + // If UserVF is specified and there are no dependencies, no need to check + // if the UserVF is legal. + if (Legal->isSafeForAnyVectorWidth()) + return UserVF; + + ElementCount MaxSafeVF = ElementCount::getFixed(MaxSafeElements); + + if (UserVF.isScalable()) { + Optional MaxVScale = TTI.getMaxVScale(); + + // Scale VF by vscale before checking if it's safe. + MaxSafeVF = ElementCount::getScalable( + MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0); - if (Legal->isSafeForAnyVectorWidth()) - return UserVF; + if (MaxSafeVF.isZero()) { + // The dependence distance is too small to use scalable vectors, + // fallback on fixed. + LLVM_DEBUG( + dbgs() + << "LV: Max legal vector width too small, scalable vectorization " + "unfeasible. Using fixed-width vectorization instead.\n"); + ORE->emit([&]() { + return OptimizationRemarkAnalysis(DEBUG_TYPE, "ScalableVFUnfeasible", + TheLoop->getStartLoc(), + TheLoop->getHeader()) + << "Max legal vector width too small, scalable vectorization " + << "unfeasible. Using fixed-width vectorization instead."; + }); + return getFeasibleUserVF( + ElementCount::getFixed(UserVF.getKnownMinValue()), MaxSafeElements); + } } - MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI); - unsigned SmallestType, WidestType; - std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes(); - unsigned WidestRegister = TTI.getRegisterBitWidth(true); + LLVM_DEBUG(dbgs() << "LV: The max safe VF is: " << MaxSafeVF << ".\n"); + if (ElementCount::isKnownLE(UserVF, MaxSafeVF)) + return UserVF; + + LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF + << " is unsafe, clamping to max safe VF=" << MaxSafeVF + << ".\n"); + ORE->emit([&]() { + return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor", + TheLoop->getStartLoc(), + TheLoop->getHeader()) + << "User-specified vectorization factor " + << ore::NV("UserVectorizationFactor", UserVF) + << " is unsafe, clamping to maximum safe vectorization factor " + << ore::NV("VectorizationFactor", MaxSafeVF); + }); + return MaxSafeVF; +} +ElementCount LoopVectorizationCostModel::computeFeasibleMaxVF( + unsigned ConstTripCount, unsigned SmallestType, unsigned WidestType) { // Get the maximum safe dependence distance in bits computed by LAA. // It is computed by MaxVF * sizeOf(type) * 8, where type is taken from // the memory accesses that is most restrictive (involved in the smallest // dependence distance). unsigned MaxSafeVectorWidthInBits = Legal->getMaxSafeVectorWidthInBits(); - // If the user vectorization factor is legally unsafe, clamp it to a safe - // value. Otherwise, return as is. - if (UserVF.isNonZero() && !IgnoreScalableUserVF) { - unsigned MaxSafeElements = - PowerOf2Floor(MaxSafeVectorWidthInBits / WidestType); - ElementCount MaxSafeVF = ElementCount::getFixed(MaxSafeElements); - - if (UserVF.isScalable()) { - Optional MaxVScale = TTI.getMaxVScale(); - - // Scale VF by vscale before checking if it's safe. - MaxSafeVF = ElementCount::getScalable( - MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0); - - if (MaxSafeVF.isZero()) { - // The dependence distance is too small to use scalable vectors, - // fallback on fixed. - LLVM_DEBUG( - dbgs() - << "LV: Max legal vector width too small, scalable vectorization " - "unfeasible. Using fixed-width vectorization instead.\n"); - ORE->emit([&]() { - return OptimizationRemarkAnalysis(DEBUG_TYPE, "ScalableVFUnfeasible", - TheLoop->getStartLoc(), - TheLoop->getHeader()) - << "Max legal vector width too small, scalable vectorization " - << "unfeasible. Using fixed-width vectorization instead."; - }); - return computeFeasibleMaxVF( - ConstTripCount, ElementCount::getFixed(UserVF.getKnownMinValue())); - } - } - - LLVM_DEBUG(dbgs() << "LV: The max safe VF is: " << MaxSafeVF << ".\n"); - - if (ElementCount::isKnownLE(UserVF, MaxSafeVF)) - return UserVF; - - LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF - << " is unsafe, clamping to max safe VF=" << MaxSafeVF - << ".\n"); - ORE->emit([&]() { - return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor", - TheLoop->getStartLoc(), - TheLoop->getHeader()) - << "User-specified vectorization factor " - << ore::NV("UserVectorizationFactor", UserVF) - << " is unsafe, clamping to maximum safe vectorization factor " - << ore::NV("VectorizationFactor", MaxSafeVF); - }); - return MaxSafeVF; - } - + unsigned WidestRegister = TTI.getRegisterBitWidth(true); WidestRegister = std::min(WidestRegister, MaxSafeVectorWidthInBits); // Ensure MaxVF is a power of 2; the dependence distance bound may not be.