diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -96,6 +96,7 @@
   SMIN_PRED,
   SRA_PRED,
   SRL_PRED,
+  SRAD_PRED,
   SUB_PRED,
   UDIV_PRED,
   UMAX_PRED,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1941,6 +1941,7 @@
     MAKE_CASE(AArch64ISD::SMIN_PRED)
     MAKE_CASE(AArch64ISD::SRA_PRED)
     MAKE_CASE(AArch64ISD::SRL_PRED)
+    MAKE_CASE(AArch64ISD::SRAD_PRED)
     MAKE_CASE(AArch64ISD::SUB_PRED)
     MAKE_CASE(AArch64ISD::UDIV_PRED)
     MAKE_CASE(AArch64ISD::UMAX_PRED)
@@ -2305,6 +2306,10 @@
 static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
 static SDValue convertFixedMaskToScalableVector(SDValue Mask,
                                                 SelectionDAG &DAG);
+static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL,
+                                             EVT VT);
+static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL,
+                                                EVT VT);
 
 /// isZerosVector - Check whether SDNode N is a zero-filled vector.
 static bool isZerosVector(const SDNode *N) {
@@ -12930,31 +12935,61 @@
   if (isIntDivCheap(N->getValueType(0), Attr))
     return SDValue(N,0); // Lower SDIV as SDIV
 
-  // fold (sdiv X, pow2)
   EVT VT = N->getValueType(0);
-  if ((VT != MVT::i32 && VT != MVT::i64) ||
+  bool IsFoldableVector =
+      Subtarget->hasSVE() &&
+      (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, true));
+  bool IsFoldableScalar = VT == MVT::i32 || VT == MVT::i64;
+
+  // fold (sdiv X, pow2)
+  if ((!IsFoldableVector && !IsFoldableScalar) ||
       !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
     return SDValue();
 
   SDLoc DL(N);
   SDValue N0 = N->getOperand(0);
   unsigned Lg2 = Divisor.countTrailingZeros();
-  SDValue Zero = DAG.getConstant(0, DL, VT);
-  SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
 
-  // Add (N0 < 0) ? Pow2 - 1 : 0;
-  SDValue CCVal;
-  SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL);
-  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
-  SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);
+  SDValue SRA;
+
+  if (IsFoldableVector && VT.isFixedLengthVector()) {
+    EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
+    auto Op = convertToScalableVector(DAG, ContainerVT, N0);
+
+    auto Pg = getPredicateForFixedLengthVector(DAG, DL, VT);
+    auto ScalableSRA =
+        DAG.getNode(AArch64ISD::SRAD_PRED, DL, ContainerVT, Pg, Op,
+                    DAG.getTargetConstant(Lg2, DL, MVT::i32));
 
-  Created.push_back(Cmp.getNode());
-  Created.push_back(Add.getNode());
-  Created.push_back(CSel.getNode());
+    SRA = convertFromScalableVector(DAG, VT, ScalableSRA);
 
-  // Divide by pow2.
-  SDValue SRA =
-      DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64));
+    Created.push_back(Op.getNode());
+    Created.push_back(Pg.getNode());
+    Created.push_back(ScalableSRA.getNode());
+  } else if (IsFoldableVector) {
+    SDValue Pg = getPredicateForScalableVector(DAG, DL, VT);
+    SRA = DAG.getNode(AArch64ISD::SRAD_PRED, DL, VT, Pg, N0,
+                      DAG.getTargetConstant(Lg2, DL, MVT::i32));
+
+    Created.push_back(Pg.getNode());
+  } else {
+    SDValue Zero = DAG.getConstant(0, DL, VT);
+    SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
+
+    // Add (N0 < 0) ? Pow2 - 1 : 0;
+    SDValue CCVal;
+    SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL);
+    SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
+    SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);
+
+    Created.push_back(Cmp.getNode());
+    Created.push_back(Add.getNode());
+    Created.push_back(CSel.getNode());
+
+    // Divide by pow2.
+    SRA =
+        DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64));
+  }
 
   // If we're dividing by a positive value, we're done.  Otherwise, we must
   // negate the result.
@@ -14994,6 +15029,9 @@
     return convertMergedOpToPredOp(N, AArch64ISD::SRL_PRED, DAG);
   case Intrinsic::aarch64_sve_asr:
     return convertMergedOpToPredOp(N, AArch64ISD::SRA_PRED, DAG);
+  case Intrinsic::aarch64_sve_asrd:
+    return DAG.getNode(AArch64ISD::SRAD_PRED, SDLoc(N), N->getValueType(0),
+                       N->getOperand(1), N->getOperand(2), N->getOperand(3));
   case Intrinsic::aarch64_sve_fadd:
     return convertMergedOpToPredOp(N, AArch64ISD::FADD_PRED, DAG);
   case Intrinsic::aarch64_sve_fsub:
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -275,6 +275,13 @@
 
 def AArch64bic : SDNode<"AArch64ISD::BIC",  SDT_AArch64Arith_Unpred>;
 
+def SDT_AArch64Arith_Imm : SDTypeProfile<1, 3, [
+  SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3,i32>,
+  SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>
+]>;
+
+def AArch64asrd_p : SDNode<"AArch64ISD::SRAD_PRED", SDT_AArch64Arith_Imm>;
+
 let Predicates = [HasSVE] in {
   defm RDFFR_PPz  : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
   def  RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
@@ -1575,7 +1582,7 @@
   defm ASR_ZPmI  : sve_int_bin_pred_shift_imm_right_dup<0b0000, "asr",  "ASR_ZPZI",  int_aarch64_sve_asr>;
   defm LSR_ZPmI  : sve_int_bin_pred_shift_imm_right_dup<0b0001, "lsr",  "LSR_ZPZI",  int_aarch64_sve_lsr>;
   defm LSL_ZPmI  : sve_int_bin_pred_shift_imm_left_dup< 0b0011, "lsl",  "LSL_ZPZI",  int_aarch64_sve_lsl>;
-  defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<    0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>;
+  defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<    0b0100, "asrd", "ASRD_ZPZI", AArch64asrd_p>;
 
   defm ASR_ZPZI : sve_int_shift_pred_bhsd<AArch64asr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
   defm LSR_ZPZI : sve_int_shift_pred_bhsd<AArch64lsr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
@@ -1586,7 +1593,7 @@
   defm ASR_ZPZZ  : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_asr>;
   defm LSR_ZPZZ  : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsr>;
   defm LSL_ZPZZ  : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsl>;
-  defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_asrd>;
+  defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<AArch64asrd_p>;
 } // End HasSVEorStreamingSVE, UseExperimentalZeroingPseudos
 
 let Predicates = [HasSVEorStreamingSVE] in {
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
@@ -0,0 +1,340 @@
+; RUN: llc -aarch64-sve-vector-bits-min=256  < %s | FileCheck %s -check-prefixes=CHECK
+; RUN: llc -aarch64-sve-vector-bits-min=384  < %s | FileCheck %s -check-prefixes=CHECK
+; RUN: llc -aarch64-sve-vector-bits-min=512  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=640  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=768  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=896  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) #0 {
+; CHECK-LABEL: sdiv_v8i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #5
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT:    ret
+  %res = sdiv <8 x i8> %op1, shufflevector (<8 x i8> insertelement (<8 x i8> poison, i8 32, i32 0), <8 x i8> poison, <8 x i32> zeroinitializer)
+  ret <8 x i8> %res
+}
+
+define <16 x i8> @sdiv_v16i8(<16 x i8> %op1) #0 {
+; CHECK-LABEL: sdiv_v16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #5
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    ret
+  %res = sdiv <16 x i8> %op1, shufflevector (<16 x i8> insertelement (<16 x i8> poison, i8 32, i32 0), <16 x i8> poison, <16 x i32> zeroinitializer)
+  ret <16 x i8> %res
+}
+
+define void @sdiv_v32i8(<32 x i8>* %a) #0 {
+; CHECK-LABEL: sdiv_v32i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b, vl32
+; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #5
+; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
+; CHECK-NEXT:    ret
+  %op1 = load <32 x i8>, <32 x i8>* %a
+  %res = sdiv <32 x i8> %op1, shufflevector (<32 x i8> insertelement (<32 x i8> poison, i8 32, i32 0), <32 x i8> poison, <32 x i32> zeroinitializer)
+  store <32 x i8> %res, <32 x i8>* %a
+  ret void
+}
+
+define void @sdiv_v64i8(<64 x i8>* %a) #0 {
+; VBITS_GE_512-LABEL: sdiv_v64i8:
+; VBITS_GE_512:       // %bb.0:
+; VBITS_GE_512-NEXT:    ptrue p0.b, vl64
+; VBITS_GE_512-NEXT:    ld1b { z0.b }, p0/z, [x0]
+; VBITS_GE_512-NEXT:    asrd z0.b, p0/m, z0.b, #5
+; VBITS_GE_512-NEXT:    st1b { z0.b }, p0, [x0]
+; VBITS_GE_512-NEXT:    ret
+  %op1 = load <64 x i8>, <64 x i8>* %a
+  %res = sdiv <64 x i8> %op1, shufflevector (<64 x i8> insertelement (<64 x i8> poison, i8 32, i32 0), <64 x i8> poison, <64 x i32> zeroinitializer)
+  store <64 x i8> %res, <64 x i8>* %a
+  ret void
+}
+
+define void @sdiv_v128i8(<128 x i8>* %a) #0 {
+; VBITS_GE_1024-LABEL: sdiv_v128i8:
+; VBITS_GE_1024:       // %bb.0:
+; VBITS_GE_1024-NEXT:    ptrue p0.b, vl128
+; VBITS_GE_1024-NEXT:    ld1b { z0.b }, p0/z, [x0]
+; VBITS_GE_1024-NEXT:    asrd z0.b, p0/m, z0.b, #5
+; VBITS_GE_1024-NEXT:    st1b { z0.b }, p0, [x0]
+; VBITS_GE_1024-NEXT:    ret
+  %op1 = load <128 x i8>, <128 x i8>* %a
+  %res = sdiv <128 x i8> %op1, shufflevector (<128 x i8> insertelement (<128 x i8> poison, i8 32, i32 0), <128 x i8> poison, <128 x i32> zeroinitializer)
+  store <128 x i8> %res, <128 x i8>* %a
+  ret void
+}
+
+define void @sdiv_v256i8(<256 x i8>* %a) #0 {
+; VBITS_GE_2048-LABEL: sdiv_v256i8:
+; VBITS_GE_2048:       // %bb.0:
+; VBITS_GE_2048-NEXT:    ptrue p0.b, vl256
+; VBITS_GE_2048-NEXT:    ld1b { z0.b }, p0/z, [x0]
+; VBITS_GE_2048-NEXT:    asrd z0.b, p0/m, z0.b, #5
+; VBITS_GE_2048-NEXT:    st1b { z0.b }, p0, [x0]
+; VBITS_GE_2048-NEXT:    ret
+  %op1 = load <256 x i8>, <256 x i8>* %a
+  %res = sdiv <256 x i8> %op1, shufflevector (<256 x i8> insertelement (<256 x i8> poison, i8 32, i32 0), <256 x i8> poison, <256 x i32> zeroinitializer)
+  store <256 x i8> %res, <256 x i8>* %a
+  ret void
+}
+
+define <4 x i16> @sdiv_v4i16(<4 x i16> %op1) #0 {
+; CHECK-LABEL: sdiv_v4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #5
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT:    ret
+  %res = sdiv <4 x i16> %op1, shufflevector (<4 x i16> insertelement (<4 x i16> poison, i16 32, i32 0), <4 x i16> poison, <4 x i32> zeroinitializer)
+  ret <4 x i16> %res
+}
+
+define <8 x i16> @sdiv_v8i16(<8 x i16> %op1) #0 {
+; CHECK-LABEL: sdiv_v8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #5
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    ret
+  %res = sdiv <8 x i16> %op1, shufflevector (<8 x i16> insertelement (<8 x i16> poison, i16 32, i32 0), <8 x i16> poison, <8 x i32> zeroinitializer)
+  ret <8 x i16> %res
+}
+
+define void @sdiv_v16i16(<16 x i16>* %a) #0 {
+; CHECK-LABEL: sdiv_v16i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h, vl16
+; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #5
+; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
+; CHECK-NEXT:    ret
+  %op1 = load <16 x i16>, <16 x i16>* %a
+  %res = sdiv <16 x i16> %op1, shufflevector (<16 x i16> insertelement (<16 x i16> poison, i16 32, i32 0), <16 x i16> poison, <16 x i32> zeroinitializer)
+  store <16 x i16> %res, <16 x i16>* %a
+  ret void
+}
+
+define void @sdiv_v32i16(<32 x i16>* %a) #0 {
+; VBITS_GE_512-LABEL: sdiv_v32i16:
+; VBITS_GE_512:       // %bb.0:
+; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
+; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
+; VBITS_GE_512-NEXT:    asrd z0.h, p0/m, z0.h, #5
+; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x0]
+; VBITS_GE_512-NEXT:    ret
+  %op1 = load <32 x i16>, <32 x i16>* %a
+  %res = sdiv <32 x i16> %op1, shufflevector (<32 x i16> insertelement (<32 x i16> poison, i16 32, i32 0), <32 x i16> poison, <32 x i32> zeroinitializer)
+  store <32 x i16> %res, <32 x i16>* %a
+  ret void
+}
+
+define void @sdiv_v64i16(<64 x i16>* %a) #0 {
+; VBITS_GE_1024-LABEL: sdiv_v64i16:
+; VBITS_GE_1024:       // %bb.0:
+; VBITS_GE_1024-NEXT:    ptrue p0.h, vl64
+; VBITS_GE_1024-NEXT:    ld1h { z0.h }, p0/z, [x0]
+; VBITS_GE_1024-NEXT:    asrd z0.h, p0/m, z0.h, #5
+; VBITS_GE_1024-NEXT:    st1h { z0.h }, p0, [x0]
+; VBITS_GE_1024-NEXT:    ret
+  %op1 = load <64 x i16>, <64 x i16>* %a
+  %res = sdiv <64 x i16> %op1, shufflevector (<64 x i16> insertelement (<64 x i16> poison, i16 32, i32 0), <64 x i16> poison, <64 x i32> zeroinitializer)
+  store <64 x i16> %res, <64 x i16>* %a
+  ret void
+}
+
+define void @sdiv_v128i16(<128 x i16>* %a) #0 {
+; VBITS_GE_2048-LABEL: sdiv_v128i16:
+; VBITS_GE_2048:       // %bb.0:
+; VBITS_GE_2048-NEXT:    ptrue p0.h, vl128
+; VBITS_GE_2048-NEXT:    ld1h { z0.h }, p0/z, [x0]
+; VBITS_GE_2048-NEXT:    asrd z0.h, p0/m, z0.h, #5
+; VBITS_GE_2048-NEXT:    st1h { z0.h }, p0, [x0]
+; VBITS_GE_2048-NEXT:    ret
+  %op1 = load <128 x i16>, <128 x i16>* %a
+  %res = sdiv <128 x i16> %op1, shufflevector (<128 x i16> insertelement (<128 x i16> poison, i16 32, i32 0), <128 x i16> poison, <128 x i32> zeroinitializer)
+  store <128 x i16> %res, <128 x i16>* %a
+  ret void
+}
+
+define <2 x i32> @sdiv_v2i32(<2 x i32> %op1) #0 {
+; CHECK-LABEL: sdiv_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT:    ret
+  %res = sdiv <2 x i32> %op1, shufflevector (<2 x i32> insertelement (<2 x i32> poison, i32 32, i32 0), <2 x i32> poison, <2 x i32> zeroinitializer)
+  ret <2 x i32> %res
+}
+
+define <4 x i32> @sdiv_v4i32(<4 x i32> %op1) #0 {
+; CHECK-LABEL: sdiv_v4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    ret
+  %res = sdiv <4 x i32> %op1, shufflevector (<4 x i32> insertelement (<4 x i32> poison, i32 32, i32 0), <4 x i32> poison, <4 x i32> zeroinitializer)
+  ret <4 x i32> %res
+}
+
+define void @sdiv_v8i32(<8 x i32>* %a) #0 {
+; CHECK-LABEL: sdiv_v8i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s, vl8
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
+; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
+; CHECK-NEXT:    ret
+  %op1 = load <8 x i32>, <8 x i32>* %a
+  %res = sdiv <8 x i32> %op1, shufflevector (<8 x i32> insertelement (<8 x i32> poison, i32 32, i32 0), <8 x i32> poison, <8 x i32> zeroinitializer)
+  store <8 x i32> %res, <8 x i32>* %a
+  ret void
+}
+
+define void @sdiv_v16i32(<16 x i32>* %a) #0 {
+; VBITS_GE_512-LABEL: sdiv_v16i32:
+; VBITS_GE_512:       // %bb.0:
+; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
+; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
+; VBITS_GE_512-NEXT:    asrd z0.s, p0/m, z0.s, #5
+; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
+; VBITS_GE_512-NEXT:    ret
+  %op1 = load <16 x i32>, <16 x i32>* %a
+  %res = sdiv <16 x i32> %op1, shufflevector (<16 x i32> insertelement (<16 x i32> poison, i32 32, i32 0), <16 x i32> poison, <16 x i32> zeroinitializer)
+  store <16 x i32> %res, <16 x i32>* %a
+  ret void
+}
+
+define void @sdiv_v32i32(<32 x i32>* %a) #0 {
+; VBITS_GE_1024-LABEL: sdiv_v32i32:
+; VBITS_GE_1024:       // %bb.0:
+; VBITS_GE_1024-NEXT:    ptrue p0.s, vl32
+; VBITS_GE_1024-NEXT:    ld1w { z0.s }, p0/z, [x0]
+; VBITS_GE_1024-NEXT:    asrd z0.s, p0/m, z0.s, #5
+; VBITS_GE_1024-NEXT:    st1w { z0.s }, p0, [x0]
+; VBITS_GE_1024-NEXT:    ret
+  %op1 = load <32 x i32>, <32 x i32>* %a
+  %res = sdiv <32 x i32> %op1, shufflevector (<32 x i32> insertelement (<32 x i32> poison, i32 32, i32 0), <32 x i32> poison, <32 x i32> zeroinitializer)
+  store <32 x i32> %res, <32 x i32>* %a
+  ret void
+}
+
+define void @sdiv_v64i32(<64 x i32>* %a) #0 {
+; VBITS_GE_2048-LABEL: sdiv_v64i32:
+; VBITS_GE_2048:       // %bb.0:
+; VBITS_GE_2048-NEXT:    ptrue p0.s, vl64
+; VBITS_GE_2048-NEXT:    ld1w { z0.s }, p0/z, [x0]
+; VBITS_GE_2048-NEXT:    asrd z0.s, p0/m, z0.s, #5
+; VBITS_GE_2048-NEXT:    st1w { z0.s }, p0, [x0]
+; VBITS_GE_2048-NEXT:    ret
+  %op1 = load <64 x i32>, <64 x i32>* %a
+  %res = sdiv <64 x i32> %op1, shufflevector (<64 x i32> insertelement (<64 x i32> poison, i32 32, i32 0), <64 x i32> poison, <64 x i32> zeroinitializer)
+  store <64 x i32> %res, <64 x i32>* %a
+  ret void
+}
+
+define <1 x i64> @sdiv_v1i64(<1 x i64> %op1) #0 {
+; CHECK-LABEL: sdiv_v1i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #5
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT:    ret
+  %res = sdiv <1 x i64> %op1, shufflevector (<1 x i64> insertelement (<1 x i64> poison, i64 32, i32 0), <1 x i64> poison, <1 x i32> zeroinitializer)
+  ret <1 x i64> %res
+}
+
+; Vector i64 sdiv are not legal for NEON so use SVE when available.
+define <2 x i64> @sdiv_v2i64(<2 x i64> %op1) #0 {
+; CHECK-LABEL: sdiv_v2i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #5
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    ret
+  %res = sdiv <2 x i64> %op1, shufflevector (<2 x i64> insertelement (<2 x i64> poison, i64 32, i32 0), <2 x i64> poison, <2 x i32> zeroinitializer)
+  ret <2 x i64> %res
+}
+
+define void @sdiv_v4i64(<4 x i64>* %a) #0 {
+; CHECK-LABEL: sdiv_v4i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d, vl4
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #5
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
+  %op1 = load <4 x i64>, <4 x i64>* %a
+  %res = sdiv <4 x i64> %op1, shufflevector (<4 x i64> insertelement (<4 x i64> poison, i64 32, i32 0), <4 x i64> poison, <4 x i32> zeroinitializer)
+  store <4 x i64> %res, <4 x i64>* %a
+  ret void
+}
+
+define void @sdiv_v8i64(<8 x i64>* %a) #0 {
+; VBITS_GE_512-LABEL: sdiv_v8i64:
+; VBITS_GE_512:       // %bb.0:
+; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
+; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
+; VBITS_GE_512-NEXT:    asrd z0.d, p0/m, z0.d, #5
+; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
+; VBITS_GE_512-NEXT:    ret
+  %op1 = load <8 x i64>, <8 x i64>* %a
+  %res = sdiv <8 x i64> %op1, shufflevector (<8 x i64> insertelement (<8 x i64> poison, i64 32, i32 0), <8 x i64> poison, <8 x i32> zeroinitializer)
+  store <8 x i64> %res, <8 x i64>* %a
+  ret void
+}
+
+define void @sdiv_v16i64(<16 x i64>* %a) #0 {
+; VBITS_GE_1024-LABEL: sdiv_v16i64:
+; VBITS_GE_1024:       // %bb.0:
+; VBITS_GE_1024-NEXT:    ptrue p0.d, vl16
+; VBITS_GE_1024-NEXT:    ld1d { z0.d }, p0/z, [x0]
+; VBITS_GE_1024-NEXT:    asrd z0.d, p0/m, z0.d, #5
+; VBITS_GE_1024-NEXT:    st1d { z0.d }, p0, [x0]
+; VBITS_GE_1024-NEXT:    ret
+  %op1 = load <16 x i64>, <16 x i64>* %a
+  %res = sdiv <16 x i64> %op1, shufflevector (<16 x i64> insertelement (<16 x i64> poison, i64 32, i32 0), <16 x i64> poison, <16 x i32> zeroinitializer)
+  store <16 x i64> %res, <16 x i64>* %a
+  ret void
+}
+
+define void @sdiv_v32i64(<32 x i64>* %a) #0 {
+; VBITS_GE_2048-LABEL: sdiv_v32i64:
+; VBITS_GE_2048:       // %bb.0:
+; VBITS_GE_2048-NEXT:    ptrue p0.d, vl32
+; VBITS_GE_2048-NEXT:    ld1d { z0.d }, p0/z, [x0]
+; VBITS_GE_2048-NEXT:    asrd z0.d, p0/m, z0.d, #5
+; VBITS_GE_2048-NEXT:    st1d { z0.d }, p0, [x0]
+; VBITS_GE_2048-NEXT:    ret
+  %op1 = load <32 x i64>, <32 x i64>* %a
+  %res = sdiv <32 x i64> %op1, shufflevector (<32 x i64> insertelement (<32 x i64> poison, i64 32, i32 0), <32 x i64> poison, <32 x i32> zeroinitializer)
+  store <32 x i64> %res, <32 x i64>* %a
+  ret void
+}
+
+attributes #0 = { "target-features"="+sve" }
diff --git a/llvm/test/CodeGen/AArch64/sve-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-sdiv-pow2.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-sdiv-pow2.ll
@@ -0,0 +1,90 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define <vscale x 16 x i8> @sdiv_i8(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: sdiv_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #4
+; CHECK-NEXT:    ret
+  %out = sdiv <vscale x 16 x i8> %a, shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 16, i32 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer)
+  ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 16 x i8> @sdiv_i8_neg(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: sdiv_i8_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #6
+; CHECK-NEXT:    subr z0.b, z0.b, #0 // =0x0
+; CHECK-NEXT:    ret
+  %out = sdiv <vscale x 16 x i8> %a, shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 -64, i32 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer)
+  ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sdiv_i16(<vscale x 8 x i16> %a) #0 {
+; CHECK-LABEL: sdiv_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #10
+; CHECK-NEXT:    ret
+  %out = sdiv <vscale x 8 x i16> %a, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1024, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+  ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 8 x i16> @sdiv_i16_neg(<vscale x 8 x i16> %a) #0 {
+; CHECK-LABEL: sdiv_i16_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #12
+; CHECK-NEXT:    subr z0.h, z0.h, #0 // =0x0
+; CHECK-NEXT:    ret
+  %out = sdiv <vscale x 8 x i16> %a, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 -4096, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+  ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sdiv_i32(<vscale x 4 x i32> %a) #0 {
+; CHECK-LABEL: sdiv_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #23
+; CHECK-NEXT:    ret
+  %out = sdiv <vscale x 4 x i32> %a, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 8388608, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @sdiv_i32_neg(<vscale x 4 x i32> %a) #0 {
+; CHECK-LABEL: sdiv_i32_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #25
+; CHECK-NEXT:    subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT:    ret
+  %out = sdiv <vscale x 4 x i32> %a, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -33554432, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @sdiv_i64(<vscale x 2 x i64> %a) #0 {
+; CHECK-LABEL: sdiv_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #53
+; CHECK-NEXT:    ret
+  %out = sdiv <vscale x 2 x i64> %a, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 9007199254740992, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+  ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @sdiv_i64_neg(<vscale x 2 x i64> %a) #0 {
+; CHECK-LABEL: sdiv_i64_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #55
+; CHECK-NEXT:    subr z0.d, z0.d, #0 // =0x0
+; CHECK-NEXT:    ret
+  %out = sdiv <vscale x 2 x i64> %a, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 -36028797018963968, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+  ret <vscale x 2 x i64> %out
+}
+
+attributes #0 = { "target-features"="+sve" }