Index: llvm/include/llvm/Analysis/TargetTransformInfo.h
===================================================================
--- llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -937,6 +937,11 @@
   ///  architectural maximum vector length, and None otherwise.
   Optional<unsigned> getMaxVScale() const;
 
+  /// \return The maximum number of bits for a block in a scalable vector
+  /// register for a scalable vector with a vscale number of blocks.
+  /// i.e. the maximum number of N x elt bits in <vscale x N x elt>.
+  unsigned getMaxScalableBitsPerBlock() const;
+
   /// \return True if the vectorization factor should be chosen to
   /// make the vector of the smallest element type match the size of a
   /// vector register. For wider element types, this could result in
@@ -1525,6 +1530,7 @@
   virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
   virtual unsigned getMinVectorRegisterBitWidth() = 0;
   virtual Optional<unsigned> getMaxVScale() const = 0;
+  virtual unsigned getMaxScalableBitsPerBlock() const = 0;
   virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
   virtual ElementCount getMinimumVF(unsigned ElemWidth,
                                     bool IsScalable) const = 0;
@@ -1953,6 +1959,9 @@
   Optional<unsigned> getMaxVScale() const override {
     return Impl.getMaxVScale();
   }
+  unsigned getMaxScalableBitsPerBlock() const override {
+    return Impl.getMaxScalableBitsPerBlock();
+  }
   bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
     return Impl.shouldMaximizeVectorBandwidth(OptSize);
   }
Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
===================================================================
--- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -373,6 +373,8 @@
 
   Optional<unsigned> getMaxVScale() const { return None; }
 
+  unsigned getMaxScalableBitsPerBlock() const { return 0; }
+
   bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; }
 
   ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const {
Index: llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
===================================================================
--- llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -109,7 +109,7 @@
   void emitRemarkWithHints() const;
 
   ElementCount getWidth() const {
-    return ElementCount::get(Width.Value, isScalable());
+    return ElementCount::get(Width.Value, isForcedScalable());
   }
   unsigned getInterleave() const { return Interleave.Value; }
   unsigned getIsVectorized() const { return IsVectorized.Value; }
@@ -121,7 +121,13 @@
     return (ForceKind)Force.Value;
   }
 
-  bool isScalable() const { return Scalable.Value; }
+  bool isForcedScalable() const {
+    return Scalable.Value == LoopVectorizeHints::FK_Enabled;
+  }
+
+  bool allowScalable() const {
+    return Scalable.Value != LoopVectorizeHints::FK_Disabled;
+  }
 
   /// If hints are provided that force vectorization, use the AlwaysPrint
   /// pass name to force the frontend to print the diagnostic.
Index: llvm/lib/Analysis/TargetTransformInfo.cpp
===================================================================
--- llvm/lib/Analysis/TargetTransformInfo.cpp
+++ llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -633,6 +633,10 @@
   return TTIImpl->getMaxVScale();
 }
 
+unsigned TargetTransformInfo::getMaxScalableBitsPerBlock() const {
+  return TTIImpl->getMaxScalableBitsPerBlock();
+}
+
 bool TargetTransformInfo::shouldMaximizeVectorBandwidth(bool OptSize) const {
   return TTIImpl->shouldMaximizeVectorBandwidth(OptSize);
 }
Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
===================================================================
--- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -121,6 +121,12 @@
     return BaseT::getMaxVScale();
   }
 
+  unsigned getMaxScalableBitsPerBlock() const {
+    if (ST->hasSVE())
+      return AArch64::SVEBitsPerBlock;
+    return BaseT::getMaxScalableBitsPerBlock();
+  }
+
   unsigned getMaxInterleaveFactor(unsigned VF);
 
   unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
Index: llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -51,6 +51,21 @@
     cl::desc("The maximum number of SCEV checks allowed with a "
              "vectorize(enable) pragma"));
 
+// FIXME: When scalable vectorization is stable enough, change the default
+// to FK_Undefined.
+static cl::opt<LoopVectorizeHints::ForceKind> ScalableVectorization(
+    "scalable-vectorization", cl::init(LoopVectorizeHints::FK_Disabled),
+    cl::Hidden,
+    cl::desc("Control whether the compiler can use scalable vectors to "
+             "vectorize a loop"),
+    cl::values(
+        clEnumValN(LoopVectorizeHints::FK_Disabled, "off",
+                   "disable all vectorization with scalable vectors"),
+        clEnumValN(LoopVectorizeHints::FK_Undefined, "on",
+                   "allow loops to be vectorized with scalable vectors"),
+        clEnumValN(LoopVectorizeHints::FK_Enabled, "always",
+                   "allow loops to be vectorized exclusively with scalable vectors")));
+
 /// Maximum vectorization interleave count.
 static const unsigned MaxInterleaveFactor = 16;
 
@@ -63,10 +78,10 @@
   case HK_UNROLL:
     return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
   case HK_FORCE:
+  case HK_SCALABLE:
     return (Val <= 1);
   case HK_ISVECTORIZED:
   case HK_PREDICATE:
-  case HK_SCALABLE:
     return (Val == 0 || Val == 1);
   }
   return false;
@@ -80,8 +95,8 @@
       Force("vectorize.enable", FK_Undefined, HK_FORCE),
       IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
       Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
-      Scalable("vectorize.scalable.enable", false, HK_SCALABLE), TheLoop(L),
-      ORE(ORE) {
+      Scalable("vectorize.scalable.enable", ScalableVectorization, HK_SCALABLE),
+      TheLoop(L), ORE(ORE) {
   // Populate values with existing loop metadata.
   getHintsFromMetadata();
 
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1652,7 +1652,8 @@
   /// than zero. One is returned if vectorization should best be avoided due
   /// to cost.
   ElementCount computeFeasibleMaxVF(unsigned ConstTripCount,
-                                    unsigned SmallestType, unsigned WidestType);
+                                    unsigned SmallestType, unsigned WidestType,
+                                    bool ComputeMaxScalableVF);
 
   /// \return A pair with the clamped VF and the maximum safe VF.
   /// If MaxSafeElements allows it, ClampedVF is the SuggestedVF, otherwise
@@ -5571,10 +5572,22 @@
     unsigned SmallestType, WidestType;
     std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes();
 
+    // Try to find a max scalable VF.
+    if (Hints->allowScalable() && TTI.supportsScalableVectors()) {
+      ElementCount MaxScalableVF =
+          computeFeasibleMaxVF(TC, SmallestType, WidestType,
+                               /*ComputeMaxScalableVF=*/true);
+      if(MaxScalableVF.isScalable())
+        LLVM_DEBUG(dbgs() << "LV: Found feasible scalable VF = " << MaxScalableVF << "\n");
+      else
+        LLVM_DEBUG(dbgs() << "LV: No feasible scalable VF found.\n");
+    }
+
     // First analyze the UserVF, fall back if the UserVF should be ignored.
     if (auto MaybeMaxVF = computeFeasibleUserVF(UserVF, WidestType))
       return MaybeMaxVF.getValue();
-    return computeFeasibleMaxVF(TC, SmallestType, WidestType);
+    return computeFeasibleMaxVF(TC, SmallestType, WidestType,
+                                /*ComputeMaxScalableVF=*/false);
   };
 
   switch (ScalarEpilogueStatus) {
@@ -5798,7 +5811,8 @@
 }
 
 ElementCount LoopVectorizationCostModel::computeFeasibleMaxVF(
-    unsigned ConstTripCount, unsigned SmallestType, unsigned WidestType) {
+    unsigned ConstTripCount, unsigned SmallestType, unsigned WidestType,
+    bool ComputeMaxScalableVF) {
   // Get the maximum safe dependence distance in bits computed by LAA.
   // It is computed by MaxVF * sizeOf(type) * 8, where type is taken from
   // the memory accesses that is most restrictive (involved in the smallest
@@ -5807,7 +5821,9 @@
   unsigned MaxSafeElements =
       PowerOf2Floor(MaxSafeVectorWidthInBits / WidestType);
 
-  unsigned WidestRegister = TTI.getRegisterBitWidth(true);
+  unsigned WidestRegister = ComputeMaxScalableVF
+                                ? TTI.getMaxScalableBitsPerBlock()
+                                : TTI.getRegisterBitWidth(true);
 
   LLVM_DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType
                     << " / " << WidestType << " bits.\n");
@@ -5816,8 +5832,8 @@
 
   // Ensure MaxVF is a power of 2; the dependence distance bound may not be.
   // Note that both WidestRegister and WidestType may not be a powers of 2.
-  auto MaxVectorSize =
-      ElementCount::getFixed(PowerOf2Floor(WidestRegister / WidestType));
+  auto MaxVectorSize = ElementCount::get(
+      PowerOf2Floor(WidestRegister / WidestType), ComputeMaxScalableVF);
   std::tie(MaxVectorSize, std::ignore) =
       clampFeasibleMaxVF(MaxVectorSize, MaxSafeElements);
 
@@ -5840,8 +5856,8 @@
   ElementCount MaxVF = MaxVectorSize;
   if (TTI.shouldMaximizeVectorBandwidth(!isScalarEpilogueAllowed()) ||
       (MaximizeBandwidth && isScalarEpilogueAllowed())) {
-    auto MaxVectorSizeMaxBW =
-        ElementCount::getFixed(PowerOf2Floor(WidestRegister / SmallestType));
+    auto MaxVectorSizeMaxBW = ElementCount::get(
+        PowerOf2Floor(WidestRegister / SmallestType), ComputeMaxScalableVF);
     std::tie(MaxVectorSizeMaxBW, std::ignore) =
         clampFeasibleMaxVF(MaxVectorSizeMaxBW, MaxSafeElements);
 
@@ -5870,7 +5886,7 @@
       }
     }
     if (ElementCount MinVF =
-            TTI.getMinimumVF(SmallestType, /*IsScalable=*/false)) {
+            TTI.getMinimumVF(SmallestType, ComputeMaxScalableVF)) {
       if (ElementCount::isKnownLT(MaxVF, MinVF)) {
         LLVM_DEBUG(dbgs() << "LV: Overriding calculated MaxVF(" << MaxVF
                           << ") with target's minimum: " << MinVF << '\n');
Index: llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-analysis.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-analysis.ll
@@ -0,0 +1,124 @@
+; REQUIRES: asserts
+; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -scalable-vectorization=on -loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_ON
+; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -scalable-vectorization=always -loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_ALWAYSON
+; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -scalable-vectorization=off -loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_DISABLED
+; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -scalable-vectorization=on -loop-vectorize -S -debug-only=loop-vectorize -vectorizer-maximize-bandwidth < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_MAXBW
+
+; Test that the MaxVF for the following loop, that has no dependence distances,
+; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16
+; (maximized bandwidth for i8 in the loop).
+define void @test0(i32* %a, i8* %b, i32* %c) {
+; CHECK: LV: Checking a loop in "test0"
+; CHECK_ON: LV: Found feasible scalable VF = vscale x 4
+; CHECK_ALWAYSON: LV: Found feasible scalable VF = vscale x 4
+; CHECK_DISABLED-NOT: LV: Found feasible scalable VF
+; CHECK_MAXBW: LV: Found feasible scalable VF = vscale x 16
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %arrayidx = getelementptr inbounds i32, i32* %c, i64 %iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
+  %1 = load i8, i8* %arrayidx2, align 4
+  %zext = zext i8 %1 to i32
+  %add = add nsw i32 %zext, %0
+  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %iv
+  store i32 %add, i32* %arrayidx5, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, 1024
+  br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+  ret void
+}
+; Test that the MaxVF for the following loop, with a dependence distance
+; of 64 elements, is calculated as (maxvscale = 16) * 4.
+define void @test1(i32* %a, i8* %b) {
+; CHECK: LV: Checking a loop in "test1"
+; CHECK_ON: LV: Found feasible scalable VF = vscale x 4
+; CHECK_ALWAYSON: LV: Found feasible scalable VF = vscale x 4
+; CHECK_DISABLED-NOT: LV: Found feasible scalable VF
+; CHECK_MAXBW: LV: Found feasible scalable VF = vscale x 4
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
+  %1 = load i8, i8* %arrayidx2, align 4
+  %zext = zext i8 %1 to i32
+  %add = add nsw i32 %zext, %0
+  %2 = add nuw nsw i64 %iv, 64
+  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
+  store i32 %add, i32* %arrayidx5, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, 1024
+  br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; Test that the MaxVF for the following loop, with a dependence distance
+; of 32 elements, is calculated as (maxvscale = 16) * 2.
+define void @test2(i32* %a, i8* %b) {
+; CHECK: LV: Checking a loop in "test2"
+; CHECK_ON: LV: Found feasible scalable VF = vscale x 2
+; CHECK_ALWAYSON: LV: Found feasible scalable VF = vscale x 2
+; CHECK_DISABLED-NOT: LV: Found feasible scalable VF
+; CHECK_MAXBW: LV: Found feasible scalable VF = vscale x 2
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
+  %1 = load i8, i8* %arrayidx2, align 4
+  %zext = zext i8 %1 to i32
+  %add = add nsw i32 %zext, %0
+  %2 = add nuw nsw i64 %iv, 32
+  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
+  store i32 %add, i32* %arrayidx5, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, 1024
+  br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; Test that the MaxVF for the following loop, with a dependence distance
+; of 16 elements, is calculated as (maxvscale = 16) * 1.
+define void @test3(i32* %a, i8* %b) {
+; CHECK: LV: Checking a loop in "test3"
+; CHECK_ON: LV: Found feasible scalable VF = vscale x 1
+; CHECK_ALWAYSON: LV: Found feasible scalable VF = vscale x 1
+; CHECK_DISABLED-NOT: LV: Found feasible scalable VF
+; CHECK_MAXBW: LV: Found feasible scalable VF = vscale x 1
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
+  %1 = load i8, i8* %arrayidx2, align 4
+  %zext = zext i8 %1 to i32
+  %add = add nsw i32 %zext, %0
+  %2 = add nuw nsw i64 %iv, 16
+  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
+  store i32 %add, i32* %arrayidx5, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, 1024
+  br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+  ret void
+}