diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -2530,6 +2530,14 @@
     return CreateShuffleVector(V, PoisonValue::get(V->getType()), Mask, Name);
   }
 
+  Value *CreateVectorReverse(Value *V, const Twine &Name = "") {
+    auto Ty = cast<VectorType>(V->getType());
+    Module *M = BB->getParent()->getParent();
+    Function *F = Intrinsic::getDeclaration(
+        M, Intrinsic::experimental_vector_reverse, Ty);
+    return Insert(CallInst::Create(F, V), Name);
+  }
+
   Value *CreateExtractValue(Value *Agg,
                             ArrayRef<unsigned> Idxs,
                             const Twine &Name = "") {
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -448,6 +448,13 @@
                                                  SrcTy.getSimpleVT()))
     return AdjustCost(Entry->Cost);
 
+  // Avoid to call BasicTTImpl for scalable vector and then assert
+  // ATM in the BasicTTImpl the cost for ISD that accepts scalabe
+  // vector is 0
+  // but not all of them accept scalable vector.
+  if (isa<ScalableVectorType>(Src))
+    return 0;
+
   return AdjustCost(
       BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
 }
@@ -1178,5 +1185,12 @@
       return LT.first * Entry->Cost;
   }
 
+  // Scalable vector type use the new intrinsic vector_reverse
+  // for TTI::SK_Reverse. Fixed vector types keep using shuffle vector
+  if (Kind == TTI::SK_Reverse && isa<ScalableVectorType>(Tp)) {
+    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+    return LT.first + 1;
+  }
+
   return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
 }
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2504,7 +2504,12 @@
 
 Value *InnerLoopVectorizer::reverseVector(Value *Vec) {
   assert(Vec->getType()->isVectorTy() && "Invalid type");
-  assert(!VF.isScalable() && "Cannot reverse scalable vectors");
+
+  // Call the instrinsic vector_reverse for scalable vector
+  if (VF.isScalable())
+    return Builder.CreateVectorReverse(Vec, "reverse");
+
+  // Keep the original behaviour for fixed vector
   SmallVector<int, 8> ShuffleMask;
   for (unsigned i = 0; i < VF.getKnownMinValue(); ++i)
     ShuffleMask.push_back(VF.getKnownMinValue() - i - 1);
@@ -2792,16 +2797,16 @@
       InBounds = gep->isInBounds();
 
     if (Reverse) {
-      assert(!VF.isScalable() &&
-             "Reversing vectors is not yet supported for scalable vectors.");
-
+      unsigned NumElts = VF.isScalable()
+                             ? VectorLoopValueMap.getNumCachedLanes()
+                             : VF.getKnownMinValue();
       // If the address is consecutive but reversed, then the
       // wide store needs to start at the last vector element.
       PartPtr = cast<GetElementPtrInst>(Builder.CreateGEP(
-          ScalarDataTy, Ptr, Builder.getInt32(-Part * VF.getKnownMinValue())));
+          ScalarDataTy, Ptr, Builder.getInt32(-Part * NumElts)));
       PartPtr->setIsInBounds(InBounds);
       PartPtr = cast<GetElementPtrInst>(Builder.CreateGEP(
-          ScalarDataTy, PartPtr, Builder.getInt32(1 - VF.getKnownMinValue())));
+          ScalarDataTy, PartPtr, Builder.getInt32(1 - NumElts)));
       PartPtr->setIsInBounds(InBounds);
       if (isMaskRequired) // Reverse of a null all-one mask is a null mask.
         BlockInMaskParts[Part] = reverseVector(BlockInMaskParts[Part]);
@@ -7315,8 +7320,6 @@
             smallestIntegerVectorType(ToVectorTy(I->getType(), VF), MinVecTy);
       }
     }
-
-    assert(!VF.isScalable() && "VF is assumed to be non scalable");
     unsigned N = isScalarAfterVectorization(I, VF) ? VF.getKnownMinValue() : 1;
     return N *
            TTI.getCastInstrCost(Opcode, VectorTy, SrcVecTy, CCH, CostKind, I);
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
@@ -0,0 +1,104 @@
+; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -S < %s 2>%t | FileCheck %s
+
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; This is the loop in c++ being vectorize in this file with
+;experimental.vector.reverse
+;  #pragma clang loop vectorize_width(4, scalable)
+;  for (int i = N-1; i >= 0; --i)
+;    a[i] = b[i] + 1.0;
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+define void @vecReverse_vla(i32 %N, double* nocapture %a, double* nocapture readonly %b) #0{
+; CHECK-LABEL: @vecReverse_vla
+; CHECK: %[[WIDE:.*]] = load <vscale x 8 x double>, <vscale x 8 x double>* %{{.*}}, align 8
+; CHECK-NEXT: %[[REVERSE:.*]] = call <vscale x 8 x double> @llvm.experimental.vector.reverse.nxv8f64(<vscale x 8 x double> %[[WIDE]])
+; CHECK-NEXT:  %[[FADD:.*]] = fadd <vscale x 8 x double> %[[REVERSE]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i32 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT:  %{{.*}} = getelementptr inbounds double, double* %{{.*}}, i64 -8
+; CHECK-NEXT:  %[[REVERSE6:.*]] = call <vscale x 8 x double> @llvm.experimental.vector.reverse.nxv8f64(<vscale x 8 x double> %[[FADD]])
+; CHECK-NEXT:  %{{.*}} = getelementptr inbounds double, double* %{{.*}}, i64 %{{.*}}
+; CHECK-NEXT:  %[[CAST:.*]] = bitcast double* %{{.*}} to <vscale x 8 x double>*
+; CHECK-NEXT:  store <vscale x 8 x double> %[[REVERSE6]], <vscale x 8 x double>* %[[CAST]], align 8
+; CHECK-NEXT:  %{{.*}} = call i64 @llvm.vscale.i64()
+
+
+entry:
+  %cmp8 = icmp sgt i32 %N, 0
+  br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup.loopexit
+
+for.body.preheader:                               ; preds = %entry
+  %0 = zext i32 %N to i64
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  ret void
+
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %indvars.iv.next = add nsw i64 %indvars.iv, -1
+  %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv.next
+  %1 = load double, double* %arrayidx, align 8
+  %add = fadd double %1, 1.000000e+00
+  %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv.next
+  store double %add, double* %arrayidx2, align 8
+  %cmp = icmp sgt i64 %indvars.iv, 1
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0
+}
+
+
+; Function Attrs: nofree norecurse nounwind uwtable mustprogress
+define void @vecReverse_i64(i32 %N, i64* nocapture %a, i64* nocapture readonly %b) #0 {
+; CHECK-LABEL: @vecReverse_i64
+; CHECK:  %[[WIDE:.*]] = load <vscale x 8 x i64>, <vscale x 8 x i64>* %{{.*}}, align 8
+; CHECK-NEXT:  %[[REVERSE:.*]] = call <vscale x 8 x i64> @llvm.experimental.vector.reverse.nxv8i64(<vscale x 8 x i64> %[[WIDE]])
+; CHECK-NEXT:  %[[CAST:.*]] = sitofp <vscale x 8 x i64> %[[REVERSE]] to <vscale x 8 x double>
+; CHECK-NEXT:  %{{.*}} = fadd <vscale x 8 x double> %[[CAST]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i32 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT:  %[[CAST2:.*]] = fptosi <vscale x 8 x double> %{{.*}} to <vscale x 8 x i64>
+; CHECK-NEXT:  %[[GETPTR:.*]] = getelementptr inbounds i64, i64* %{{.*}}, i64 -8
+; CHECK-NEXT:  %[[REVERSE6:.*]] = call <vscale x 8 x i64> @llvm.experimental.vector.reverse.nxv8i64(<vscale x 8 x i64> %[[CAST2]])
+; CHECK-NEXT:  %{{.*}} = getelementptr inbounds i64, i64* %[[GETPTR]], i64 %{{.*}}
+; CHECK-NEXT:  %[[BITCAST:.*]] = bitcast i64* %{{.*}} to <vscale x 8 x i64>*
+; CHECK-NEXT:  store <vscale x 8 x i64> %[[REVERSE6]], <vscale x 8 x i64>* %[[BITCAST]], align 8
+; CHECK-NEXT:  %{{.*}} = call i64 @llvm.vscale.i64()
+
+entry:
+  %cmp9 = icmp sgt i32 %N, 0
+  br i1 %cmp9, label %for.body.preheader, label %for.cond.cleanup.loopexit
+
+for.body.preheader:                               ; preds = %entry
+  %0 = zext i32 %N to i64
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %indvars.iv.next = add nsw i64 %indvars.iv, -1
+  %arrayidx = getelementptr inbounds i64, i64* %b, i64 %indvars.iv.next
+  %1 = load i64, i64* %arrayidx, align 8
+  %conv = sitofp i64 %1 to double
+  %add = fadd double %conv, 1.000000e+00
+  %conv1 = fptosi double %add to i64
+  %arrayidx3 = getelementptr inbounds i64, i64* %a, i64 %indvars.iv.next
+  store i64 %conv1, i64* %arrayidx3, align 8
+  %cmp = icmp sgt i64 %indvars.iv, 1
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0
+}
+
+attributes #0 = { "target-cpu"="generic" "target-features"="+neon,+sve" }
+
+
+!0 = distinct !{!0, !1, !2, !3, !4}
+!1 = !{!"llvm.loop.mustprogress"}
+!2 = !{!"llvm.loop.vectorize.width", i32 8}
+!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
+!4 = !{!"llvm.loop.vectorize.enable", i1 true}
+
+
+