Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1637,6 +1637,13 @@
   ElementCount computeFeasibleMaxVF(unsigned ConstTripCount,
                                     unsigned SmallestType, unsigned WidestType);
 
+  /// \return A pair with the clamped VF and the maximum safe VF.
+  /// If MaxSafeElements allows it, ClampedVF is the SuggestedVF, otherwise
+  /// it is clamped to MaxSafeElements. If SuggestedVF is scalable, the returned
+  /// VF may be clamped to a non-scalable VF.
+  std::pair<ElementCount, ElementCount>
+  clampFeasibleMaxVF(ElementCount SuggestedVF, unsigned MaxSafeElements);
+
   /// The vectorization cost is a combination of the cost itself and a boolean
   /// indicating whether any of the contributing operations will actually
   /// operate on
@@ -5643,6 +5650,30 @@
   return None;
 }
 
+std::pair<ElementCount, ElementCount>
+LoopVectorizationCostModel::clampFeasibleMaxVF(ElementCount SuggestedVF,
+                                               unsigned MaxSafeElements) {
+  ElementCount MaxSafeVF = ElementCount::getFixed(MaxSafeElements);
+  if (SuggestedVF.isScalable()) {
+    Optional<unsigned> MaxVScale = TTI.getMaxVScale();
+
+    // Scale VF by vscale before checking if it's safe.
+    MaxSafeVF = ElementCount::getScalable(
+        MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0);
+
+    // Fall back on fixed-width VF.
+    if (MaxSafeVF.isZero())
+      return clampFeasibleMaxVF(
+          ElementCount::getFixed(SuggestedVF.getKnownMinValue()),
+          MaxSafeElements);
+  }
+
+  if (ElementCount::isKnownLE(SuggestedVF, MaxSafeVF))
+    return {SuggestedVF, MaxSafeVF};
+
+  return {MaxSafeVF, MaxSafeVF};
+}
+
 Optional<ElementCount>
 LoopVectorizationCostModel::computeFeasibleUserVF(ElementCount UserVF,
                                                   unsigned WidestType) {
@@ -5672,51 +5703,39 @@
   if (Legal->isSafeForAnyVectorWidth())
     return UserVF;
 
-  ElementCount MaxSafeVF = ElementCount::getFixed(MaxSafeElements);
-
-  if (UserVF.isScalable()) {
-    Optional<unsigned> MaxVScale = TTI.getMaxVScale();
-
-    // Scale VF by vscale before checking if it's safe.
-    MaxSafeVF = ElementCount::getScalable(
-        MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0);
-
-    if (MaxSafeVF.isZero()) {
-      // The dependence distance is too small to use scalable vectors,
-      // fallback on fixed.
-      LLVM_DEBUG(
-          dbgs()
-          << "LV: Max legal vector width too small, scalable vectorization "
-             "unfeasible. Using fixed-width vectorization instead.\n");
-      ORE->emit([&]() {
-        return OptimizationRemarkAnalysis(DEBUG_TYPE, "ScalableVFUnfeasible",
-                                          TheLoop->getStartLoc(),
-                                          TheLoop->getHeader())
-               << "Max legal vector width too small, scalable vectorization "
-               << "unfeasible. Using fixed-width vectorization instead.";
-      });
-      return computeFeasibleUserVF(
-          ElementCount::getFixed(UserVF.getKnownMinValue()), WidestType);
-    }
-  }
+  // If the user vectorization factor is legally unsafe, clamp it to a safe
+  // value. Otherwise, return as is.
+  ElementCount NewVF = ElementCount::getNull(), MaxSafeVF = NewVF;
+  std::tie(NewVF, MaxSafeVF) = clampFeasibleMaxVF(UserVF, MaxSafeElements);
 
+  // Emit some useful debug output / opt remarks if the user value is clamped.
   LLVM_DEBUG(dbgs() << "LV: The max safe VF is: " << MaxSafeVF << ".\n");
-  if (ElementCount::isKnownLE(UserVF, MaxSafeVF))
-    return UserVF;
-
-  LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF
-                    << " is unsafe, clamping to max safe VF=" << MaxSafeVF
-                    << ".\n");
-  ORE->emit([&]() {
-    return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",
-                                      TheLoop->getStartLoc(),
-                                      TheLoop->getHeader())
-           << "User-specified vectorization factor "
-           << ore::NV("UserVectorizationFactor", UserVF)
-           << " is unsafe, clamping to maximum safe vectorization factor "
-           << ore::NV("VectorizationFactor", MaxSafeVF);
-  });
-  return MaxSafeVF;
+  if (UserVF.isScalable() != NewVF.isScalable()) {
+    auto Diag = "Max legal vector width too small, scalable vectorization "
+                "unfeasible. Using fixed-width vectorization instead.";
+    LLVM_DEBUG(dbgs() << "LV: " << Diag << "\n");
+    ORE->emit([&]() {
+      return OptimizationRemarkAnalysis(DEBUG_TYPE, "ScalableVFUnfeasible",
+                                        TheLoop->getStartLoc(),
+                                        TheLoop->getHeader())
+             << Diag;
+    });
+  }
+  if (NewVF.getKnownMinValue() != UserVF.getKnownMinValue()) {
+    LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF
+                      << " is unsafe, clamping to max safe VF=" << NewVF
+                      << ".\n");
+    ORE->emit([&]() {
+      return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",
+                                        TheLoop->getStartLoc(),
+                                        TheLoop->getHeader())
+             << "User-specified vectorization factor "
+             << ore::NV("UserVectorizationFactor", UserVF)
+             << " is unsafe, clamping to maximum safe vectorization factor "
+             << ore::NV("VectorizationFactor", NewVF);
+    });
+  }
+  return NewVF;
 }
 
 ElementCount LoopVectorizationCostModel::computeFeasibleMaxVF(
@@ -5726,43 +5745,50 @@
   // the memory accesses that is most restrictive (involved in the smallest
   // dependence distance).
   unsigned MaxSafeVectorWidthInBits = Legal->getMaxSafeVectorWidthInBits();
+  unsigned MaxSafeElements =
+      PowerOf2Floor(MaxSafeVectorWidthInBits / WidestType);
 
   unsigned WidestRegister = TTI.getRegisterBitWidth(true);
-  WidestRegister = std::min(WidestRegister, MaxSafeVectorWidthInBits);
-
-  // Ensure MaxVF is a power of 2; the dependence distance bound may not be.
-  // Note that both WidestRegister and WidestType may not be a powers of 2.
-  auto MaxVectorSize =
-      ElementCount::getFixed(PowerOf2Floor(WidestRegister / WidestType));
 
   LLVM_DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType
                     << " / " << WidestType << " bits.\n");
   LLVM_DEBUG(dbgs() << "LV: The Widest register safe to use is: "
                     << WidestRegister << " bits.\n");
 
-  assert(MaxVectorSize.getFixedValue() <= WidestRegister &&
-         "Did not expect to pack so many elements"
-         " into one vector!");
-  if (MaxVectorSize.getFixedValue() == 0) {
+  // Ensure MaxVF is a power of 2; the dependence distance bound may not be.
+  // Note that both WidestRegister and WidestType may not be a powers of 2.
+  auto MaxVectorSize =
+      ElementCount::getFixed(PowerOf2Floor(WidestRegister / WidestType));
+  std::tie(MaxVectorSize, std::ignore) =
+      clampFeasibleMaxVF(MaxVectorSize, MaxSafeElements);
+
+  if (MaxVectorSize.getKnownMinValue() == 0) {
     LLVM_DEBUG(dbgs() << "LV: The target has no vector registers.\n");
     return ElementCount::getFixed(1);
-  } else if (ConstTripCount && ConstTripCount < MaxVectorSize.getFixedValue() &&
-             isPowerOf2_32(ConstTripCount)) {
+  } else if (ConstTripCount && isPowerOf2_32(ConstTripCount)) {
     // We need to clamp the VF to be the ConstTripCount. There is no point in
     // choosing a higher viable VF as done in the loop below.
-    LLVM_DEBUG(dbgs() << "LV: Clamping the MaxVF to the constant trip count: "
-                      << ConstTripCount << "\n");
-    return ElementCount::getFixed(ConstTripCount);
+    ElementCount ClampedVF = MaxVectorSize;
+    std::tie(ClampedVF, std::ignore) =
+        clampFeasibleMaxVF(MaxVectorSize, ConstTripCount);
+    if (ClampedVF != MaxVectorSize) {
+      LLVM_DEBUG(dbgs() << "LV: Clamping the MaxVF to the constant trip count: "
+                        << ConstTripCount << "\n");
+      return ClampedVF;
+    }
   }
 
   ElementCount MaxVF = MaxVectorSize;
   if (TTI.shouldMaximizeVectorBandwidth(!isScalarEpilogueAllowed()) ||
       (MaximizeBandwidth && isScalarEpilogueAllowed())) {
+    auto MaxVectorSizeMaxBW =
+        ElementCount::getFixed(PowerOf2Floor(WidestRegister / SmallestType));
+    std::tie(MaxVectorSizeMaxBW, std::ignore) =
+        clampFeasibleMaxVF(MaxVectorSizeMaxBW, MaxSafeElements);
+
     // Collect all viable vectorization factors larger than the default MaxVF
     // (i.e. MaxVectorSize).
     SmallVector<ElementCount, 8> VFs;
-    auto MaxVectorSizeMaxBW =
-        ElementCount::getFixed(WidestRegister / SmallestType);
     for (ElementCount VS = MaxVectorSize * 2;
          ElementCount::isKnownLE(VS, MaxVectorSizeMaxBW); VS *= 2)
       VFs.push_back(VS);
Index: llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
@@ -37,9 +37,9 @@
 ; unless max(vscale)=2 it's unsafe to vectorize. For SVE max(vscale)=16, check
 ; fixed-width vectorization is used instead.
 
+; CHECK-DBG: LV: The max safe VF is: 8.
 ; CHECK-DBG: LV: Max legal vector width too small, scalable vectorization unfeasible. Using fixed-width vectorization instead.
 ; CHECK-DBG: remark: <unknown>:0:0: Max legal vector width too small, scalable vectorization unfeasible. Using fixed-width vectorization instead.
-; CHECK-DBG: LV: The max safe VF is: 8.
 ; CHECK-DBG: LV: Selecting VF: 4.
 ; CHECK-LABEL: @test1
 ; CHECK: <4 x i32>
@@ -80,9 +80,9 @@
 ;   }
 ; }
 
-; CHECK-DBG: LV: Max legal vector width too small, scalable vectorization unfeasible. Using fixed-width vectorization instead.
 ; CHECK-DBG: LV: The max safe VF is: 4.
-; CHECK-DBG: LV: User VF=8 is unsafe, clamping to max safe VF=4.
+; CHECK-DBG: LV: Max legal vector width too small, scalable vectorization unfeasible. Using fixed-width vectorization instead.
+; CHECK-DBG: LV: User VF=vscale x 8 is unsafe, clamping to max safe VF=4.
 ; CHECK-DBG: LV: Selecting VF: 4.
 ; CHECK-LABEL: @test2
 ; CHECK: <4 x i32>
@@ -337,8 +337,8 @@
 ; supported but max vscale is undefined.
 ;
 ; CHECK-NO-MAX-VSCALE-LABEL: LV: Checking a loop in "test_no_max_vscale"
-; CHECK-NO-MAX-VSCALE: LV: Max legal vector width too small, scalable vectorization unfeasible. Using fixed-width vectorization instead.
 ; CEHCK-NO-MAX-VSCALE: The max safe VF is: 4.
+; CHECK-NO-MAX-VSCALE: LV: Max legal vector width too small, scalable vectorization unfeasible. Using fixed-width vectorization instead.
 ; CHECK-NO-MAX-VSCALE: LV: Selecting VF: 4.
 ; CHECK-NO-MAX-VSCALE: <4 x i32>
 define void @test_no_max_vscale(i32* %a, i32* %b) {