diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -61,6 +61,7 @@
   unsigned XLen = 32;
   MVT XLenVT = MVT::i32;
   uint8_t MaxInterleaveFactor = 2;
+  unsigned VScaleForTuning = 2;
   RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown;
   BitVector UserReservedRegister;
   RISCVFrameLowering FrameLowering;
@@ -162,6 +163,7 @@
   // implied by the architecture.
   unsigned getMaxRVVVectorSizeInBits() const;
   unsigned getMinRVVVectorSizeInBits() const;
+  unsigned getVScaleForTuning() const { return VScaleForTuning; }
   unsigned getMaxLMULForFixedLengthVectors() const;
   unsigned getMaxELENForFixedLengthVectors() const;
   bool useRVVForFixedLengthVectors() const;
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -73,6 +73,10 @@
     llvm_unreachable("Unsupported register kind");
   }
 
+  Optional<unsigned> getVScaleForTuning() const {
+    return ST->getVScaleForTuning();
+  }
+
   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                TTI::UnrollingPreferences &UP,
                                OptimizationRemarkEmitter *ORE);
@@ -90,6 +94,11 @@
                                          TTI::TargetCostKind CostKind,
                                          const Instruction *I);
 
+  InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+                                   TTI::CastContextHint CCH,
+                                   TTI::TargetCostKind CostKind,
+                                   const Instruction *I = nullptr);
+
   bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) {
     if (!ST->hasVInstructions())
       return false;
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -163,6 +163,42 @@
   return NumLoads * MemOpCost;
 }
 
+InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
+                                               Type *Src,
+                                               TTI::CastContextHint CCH,
+                                               TTI::TargetCostKind CostKind,
+                                               const Instruction *I) {
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+  assert(ISD && "Invalid opcode");
+
+  // TODO: Allow non-throughput costs that aren't binary.
+  auto AdjustCost = [&CostKind](InstructionCost Cost) -> InstructionCost {
+    if (CostKind != TTI::TCK_RecipThroughput)
+      return Cost == 0 ? 0 : 1;
+    return Cost;
+  };
+
+  EVT SrcTy = TLI->getValueType(DL, Src);
+  EVT DstTy = TLI->getValueType(DL, Dst);
+
+  if (!SrcTy.isSimple() || !DstTy.isSimple())
+    return AdjustCost(
+        BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
+
+  if (ISD == ISD::ZERO_EXTEND && SrcTy.isScalableVector() &&
+      DstTy.isScalableVector()) {
+    if (SrcTy.isInteger() && DstTy.isInteger() &&
+        SrcTy.getVectorElementCount() == DstTy.getVectorElementCount()) {
+      if (SrcTy.getScalarSizeInBits() <= DstTy.getScalarSizeInBits()) {
+        return 0;
+      }
+    }
+  }
+
+  return AdjustCost(
+      BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
+}
+
 void RISCVTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                            TTI::UnrollingPreferences &UP,
                                            OptimizationRemarkEmitter *ORE) {
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vectorization.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vectorization.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vectorization.ll
@@ -0,0 +1,166 @@
+; REQUIRES: asserts
+; RUN: opt -mtriple=riscv64-unknown-elf -mattr=+experimental-v -riscv-v-vector-bits-min=128 -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=off < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_DISABLED
+; RUN: opt -mtriple=riscv64-unknown-elf -mattr=+experimental-v -riscv-v-vector-bits-min=128 -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON
+; RUN: opt -mtriple=riscv64-unknown-elf -mattr=+experimental-v -riscv-v-vector-bits-min=128 -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -vectorizer-maximize-bandwidth -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON_MAXBW
+
+; Test that the MaxVF for the following loop, that has no dependence distances,
+; is calculated as vscale x 2 (max legal RVV vector size) or vscale x 8
+; (maximized bandwidth for i8 in the loop).
+define void @test0(i32* %a, i8* %b, i32* %c) #0 {
+; CHECK: LV: Checking a loop in "test0"
+; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2
+; CHECK_SCALABLE_ON: LV: Selecting VF: vscale x 2
+; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
+; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
+; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 8
+; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: vscale x 8
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %arrayidx = getelementptr inbounds i32, i32* %c, i64 %iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
+  %1 = load i8, i8* %arrayidx2, align 4
+  %zext = zext i8 %1 to i32
+  %add = add nsw i32 %zext, %0
+  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %iv
+  store i32 %add, i32* %arrayidx5, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, 1024
+  br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; Test that the MaxVF for the following loop, with a dependence distance
+; of 64 elements, is calculated as (maxvscale = 16) * 2.
+define void @test1(i32* %a, i8* %b) #0 {
+; CHECK: LV: Checking a loop in "test1"
+; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2
+; CHECK_SCALABLE_ON: LV: Selecting VF: vscale x 2
+; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
+; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
+; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 4
+; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
+  %1 = load i8, i8* %arrayidx2, align 4
+  %zext = zext i8 %1 to i32
+  %add = add nsw i32 %zext, %0
+  %2 = add nuw nsw i64 %iv, 64
+  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
+  store i32 %add, i32* %arrayidx5, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, 1024
+  br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; Test that the MaxVF for the following loop, with a dependence distance
+; of 32 elements, is calculated as (maxvscale = 16) * 2.
+define void @test2(i32* %a, i8* %b) #0 {
+; CHECK: LV: Checking a loop in "test2"
+; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2
+; CHECK_SCALABLE_ON: LV: Selecting VF: vscale x 2
+; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
+; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
+; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 2
+; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
+  %1 = load i8, i8* %arrayidx2, align 4
+  %zext = zext i8 %1 to i32
+  %add = add nsw i32 %zext, %0
+  %2 = add nuw nsw i64 %iv, 32
+  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
+  store i32 %add, i32* %arrayidx5, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, 1024
+  br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; Test that the MaxVF for the following loop, with a dependence distance
+; of 16 elements, is calculated as (maxvscale = 16) * 1.
+define void @test3(i32* %a, i8* %b) #0 {
+; CHECK: LV: Checking a loop in "test3"
+; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1
+; CHECK_SCALABLE_ON: LV: Selecting VF: 4
+; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
+; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
+; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 1
+; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
+  %1 = load i8, i8* %arrayidx2, align 4
+  %zext = zext i8 %1 to i32
+  %add = add nsw i32 %zext, %0
+  %2 = add nuw nsw i64 %iv, 16
+  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
+  store i32 %add, i32* %arrayidx5, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, 1024
+  br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; Test the fallback mechanism when scalable vectors are not feasible due
+; to e.g. dependence distance.
+define void @test4(i32* %a, i32* %b) #0 {
+; CHECK: LV: Checking a loop in "test4"
+; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
+; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
+; CHECK_SCALABLE_ON: LV: Selecting VF: 4
+; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
+; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
+; CHECK_SCALABLE_ON_MAXBW-NOT: LV: Found feasible scalable VF
+; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 4
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
+  %1 = load i32, i32* %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  %2 = add nuw nsw i64 %iv, 8
+  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
+  store i32 %add, i32* %arrayidx5, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, 1024
+  br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+attributes #0 = { vscale_range(1, 16) }