diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10963,6 +10963,28 @@
   return SDValue();
 }
 
+static bool isPow2Splat(SDValue Op, int64_t &SplatVal, bool &Negated) {
+  if (Op.getOpcode() != AArch64ISD::DUP && Op.getOpcode() != ISD::SPLAT_VECTOR)
+    return false;
+
+  if (!isa<ConstantSDNode>(Op->getOperand(0)))
+    return false;
+
+  SplatVal = Op->getConstantOperandVal(0);
+  if (Op.getValueType().getVectorElementType() != MVT::i64)
+    SplatVal = (int32_t)SplatVal;
+
+  Negated = false;
+  if (isPowerOf2_64(SplatVal))
+    return true;
+
+  Negated = true;
+  if (isPowerOf2_64(-SplatVal))
+    return true;
+
+  return false;
+}
+
 SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
 
@@ -10977,6 +10999,11 @@
   if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
     return LowerToPredicatedOp(Op, DAG, PredOpcode);
 
+  bool Negated;
+  int64_t SplatVal;
+  if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated))
+    return LowerToPredicatedOp(Op, DAG, PredOpcode);
+
   // SVE doesn't have i8 and i16 DIV operations; widen them to 32-bit
   // operations, and truncate the result.
   EVT WidenedVT;
@@ -12961,13 +12988,42 @@
   if (isIntDivCheap(N->getValueType(0), Attr))
     return SDValue(N,0); // Lower SDIV as SDIV
 
-  // fold (sdiv X, pow2)
   EVT VT = N->getValueType(0);
+  SDLoc DL(N);
+
+  // FIXME: We must handle NEON sized vectors here since the NEON mov (imm)
+  // expansion gets in the way. Ideally that expansion should be pushed into
+  // instruction selection using splat_vector, at which point, this can be
+  // removed.
+  if (Subtarget->hasSVE() && (VT.is128BitVector() || VT.is64BitVector())) {
+    EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
+    auto ConstType =
+        VT.getVectorElementType() == MVT::i64 ? MVT::i64 : MVT::i32;
+    auto Op = convertToScalableVector(DAG, ContainerVT, N->getOperand(0));
+    auto Splat = DAG.getSplatVector(
+        ContainerVT, DL,
+        DAG.getTargetConstant(Divisor.getSExtValue(), DL, ConstType));
+    auto SDiv = DAG.getNode(ISD::SDIV, DL, ContainerVT, Op, Splat);
+    auto Result = convertFromScalableVector(DAG, VT, SDiv);
+
+    Created.push_back(Op.getNode());
+    Created.push_back(Splat.getNode());
+    Created.push_back(SDiv.getNode());
+
+    return Result;
+  }
+
+  // For scalable and larger than NEON types, mark them as cheap so we can
+  // handle it much later. This allows us to handle larger than legal fixed
+  // types, as well sdiv intrinsics.
+  if (VT.isScalableVector() || Subtarget->useSVEForFixedLengthVectors())
+    return SDValue(N, 0);
+
+  // fold (sdiv X, pow2)
   if ((VT != MVT::i32 && VT != MVT::i64) ||
       !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
     return SDValue();
 
-  SDLoc DL(N);
   SDValue N0 = N->getOperand(0);
   unsigned Lg2 = Divisor.countTrailingZeros();
   SDValue Zero = DAG.getConstant(0, DL, VT);
@@ -13519,6 +13575,40 @@
                      DAG.getConstant(C, DL, MVT::i32));
 }
 
+static SDValue performSDivPredCombine(SDNode *N, SelectionDAG &DAG) {
+  SDLoc DL(N);
+  EVT VT = N->getValueType(0);
+
+  bool IsIntrinsic = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN;
+
+  SDValue Op = N->getOperand(IsIntrinsic ? 3 : 2);
+
+  bool Negated;
+  int64_t SplatVal;
+  if (!isPow2Splat(Op, SplatVal, Negated) || !Negated)
+    return SDValue();
+
+  MVT ConstType = VT.getVectorElementType() == MVT::i64 ? MVT::i64 : MVT::i32;
+
+  SDValue Pow2 = DAG.getConstant(-SplatVal, DL, ConstType);
+  SDValue Splat = DAG.getSplatVector(VT, DL, Pow2);
+  SDValue SDiv =
+      IsIntrinsic
+          ? DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, N->getOperand(0),
+                        N->getOperand(1), N->getOperand(2), Splat)
+          : DAG.getNode(AArch64ISD::SDIV_PRED, DL, VT, N->getOperand(0),
+                        N->getOperand(1), Splat);
+  SDValue ZeroSplat =
+      DAG.getSplatVector(VT, DL, DAG.getConstant(0, DL, ConstType));
+
+  if (IsIntrinsic)
+    return DAG.getNode(AArch64ISD::SUB_PRED, DL, VT, N->getOperand(1),
+                       ZeroSplat, SDiv);
+
+  // SDIV_PRED has inactive lanes as undef, hence it is safe to ignore predicate
+  return DAG.getNode(ISD::SUB, DL, VT, ZeroSplat, SDiv);
+}
+
 /// An EXTR instruction is made up of two shifts, ORed together. This helper
 /// searches for and classifies those shifts.
 static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
@@ -15174,6 +15264,8 @@
   case Intrinsic::aarch64_sve_ptest_last:
     return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
                     AArch64CC::LAST_ACTIVE);
+  case Intrinsic::aarch64_sve_sdiv:
+    return performSDivPredCombine(N, DAG);
   }
   return SDValue();
 }
@@ -17303,6 +17395,8 @@
     return performUzpCombine(N, DAG);
   case AArch64ISD::SETCC_MERGE_ZERO:
     return performSetccMergeZeroCombine(N, DAG);
+  case AArch64ISD::SDIV_PRED:
+    return performSDivPredCombine(N, DAG);
   case AArch64ISD::GLD1_MERGE_ZERO:
   case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
   case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
@@ -18710,6 +18804,11 @@
   if (EltVT == MVT::i32 || EltVT == MVT::i64)
     return LowerToPredicatedOp(Op, DAG, PredOpcode, /*OverrideNEON=*/true);
 
+  bool Negated;
+  int64_t SplatVal;
+  if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated))
+    return LowerToPredicatedOp(Op, DAG, PredOpcode, /*OverrideNEON=*/true);
+
   // Scalable vector i8/i16 DIV is not supported. Promote it to i32.
   EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
   EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1575,11 +1575,12 @@
   defm ASR_ZPmI  : sve_int_bin_pred_shift_imm_right_dup<0b0000, "asr",  "ASR_ZPZI",  int_aarch64_sve_asr>;
   defm LSR_ZPmI  : sve_int_bin_pred_shift_imm_right_dup<0b0001, "lsr",  "LSR_ZPZI",  int_aarch64_sve_lsr>;
   defm LSL_ZPmI  : sve_int_bin_pred_shift_imm_left_dup< 0b0011, "lsl",  "LSL_ZPZI",  int_aarch64_sve_lsl>;
-  defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<    0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>;
+  defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<    0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd, int_aarch64_sve_sdiv>;
 
-  defm ASR_ZPZI : sve_int_shift_pred_bhsd<AArch64asr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
-  defm LSR_ZPZI : sve_int_shift_pred_bhsd<AArch64lsr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
-  defm LSL_ZPZI : sve_int_shift_pred_bhsd<AArch64lsl_p, SVEShiftImmL8, SVEShiftImmL16, SVEShiftImmL32, SVEShiftImmL64>;
+  defm ASR_ZPZI  : sve_int_shift_pred_bhsd<AArch64asr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
+  defm LSR_ZPZI  : sve_int_shift_pred_bhsd<AArch64lsr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
+  defm LSL_ZPZI  : sve_int_shift_pred_bhsd<AArch64lsl_p, SVEShiftImmL8, SVEShiftImmL16, SVEShiftImmL32, SVEShiftImmL64>;
+  defm ASRD_ZPZI : sve_int_shift_imm_pred_bhsd<AArch64sdiv_p>;
 } // End HasSVEorStreamingSVE
 
 let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in {
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -155,6 +155,25 @@
   let ParserMatchClass = SVELogicalImmNotOperand<32>;
 }
 
+def IsPow2_32: PatLeaf<(i32 imm), [{
+  uint32_t V = N->getZExtValue();
+  return isPowerOf2_32(V);
+}]>;
+
+def Log2_32: SDNodeXForm<imm, [{
+  uint32_t V = N->getZExtValue();
+  return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32);
+}]>;
+def IsPow2_64: PatLeaf<(i64 imm), [{
+  uint64_t V = N->getZExtValue();
+  return isPowerOf2_64(V);
+}]>;
+
+def Log2_64: SDNodeXForm<imm, [{
+  uint64_t V = N->getZExtValue();
+  return CurDAG->getTargetConstant(Log2_64(V), SDLoc(N), MVT::i32);
+}]>;
+
 class SVEShiftedImmOperand<int ElementWidth, string Infix, string Predicate>
     : AsmOperandClass {
   let Name = "SVE" # Infix # "Imm" # ElementWidth;
@@ -490,6 +509,13 @@
 : Pat<(vt (op (pt (SVEAllActive)), vt:$Rn, (vt (AArch64dup (it (cast i32:$imm)))))),
       (inst $Rn, i32:$imm)>;
 
+class SVE_Shift_DupImm_Div_Pat<ValueType vt, SDPatternOperator op,
+                               ValueType pt, ValueType it,
+                               PatLeaf pow, SDNodeXForm log,
+                               Instruction inst>
+ : Pat<(vt (op pt:$Pg, vt:$Op, (vt (AArch64dup (it (pow:$V)))))),
+       (inst $Pg, $Op, (log $V))>;
+
 class SVE_2_Op_Fp_Imm_Pat<ValueType vt, SDPatternOperator op,
                           ValueType pt, ValueType it,
                           FPImmLeaf immL, int imm,
@@ -5344,7 +5370,8 @@
 }
 
 multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm, string Ps,
-                                            SDPatternOperator op = null_frag> {
+                                            SDPatternOperator op = null_frag,
+                                            SDPatternOperator divOp = null_frag> {
   def _B : SVEPseudo2Instr<Ps # _B, 1>,
            sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
   def _H : SVEPseudo2Instr<Ps # _H, 1>,
@@ -5365,12 +5392,17 @@
   def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1,  nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
   def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1,  nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
   def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1,  nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
+
+  def : SVE_Shift_DupImm_Div_Pat<nxv16i8, divOp, nxv16i1, i32, IsPow2_32, Log2_32, !cast<Instruction>(NAME # _B)>;
+  def : SVE_Shift_DupImm_Div_Pat<nxv8i16, divOp, nxv8i1,  i32, IsPow2_32, Log2_32, !cast<Instruction>(NAME # _H)>;
+  def : SVE_Shift_DupImm_Div_Pat<nxv4i32, divOp, nxv4i1,  i32, IsPow2_32, Log2_32, !cast<Instruction>(NAME # _S)>;
+  def : SVE_Shift_DupImm_Div_Pat<nxv2i64, divOp, nxv2i1,  i64, IsPow2_64, Log2_64, !cast<Instruction>(NAME # _D)>;
 }
 
 // As above but shift amount takes the form of a "vector immediate".
 multiclass sve_int_bin_pred_shift_imm_right_dup<bits<4> opc, string asm,
                                             string Ps, SDPatternOperator op>
-: sve_int_bin_pred_shift_imm_right<opc, asm, Ps, null_frag> {
+: sve_int_bin_pred_shift_imm_right<opc, asm, Ps, null_frag, null_frag> {
   def : SVE_Shift_DupImm_Pred_Pat<nxv16i8, op, nxv16i1, i32, SVEShiftImmR8,  !cast<Instruction>(NAME # _B)>;
   def : SVE_Shift_DupImm_Pred_Pat<nxv8i16, op, nxv8i1,  i32, SVEShiftImmR16, !cast<Instruction>(NAME # _H)>;
   def : SVE_Shift_DupImm_Pred_Pat<nxv4i32, op, nxv4i1,  i32, SVEShiftImmR32, !cast<Instruction>(NAME # _S)>;
@@ -8432,6 +8464,18 @@
   def : SVE_Shift_DupImm_Pred_Pat<nxv2i64, op, nxv2i1,  i64, imm_d, !cast<Instruction>(NAME # _UNDEF_D)>;
 }
 
+multiclass sve_int_shift_imm_pred_bhsd<SDPatternOperator divOp> {
+  def _UNDEF_B : PredTwoOpImmPseudo<NAME # _B, ZPR8,  Operand<i32>, FalseLanesUndef>;
+  def _UNDEF_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, Operand<i32>, FalseLanesUndef>;
+  def _UNDEF_S : PredTwoOpImmPseudo<NAME # _S, ZPR32, Operand<i32>, FalseLanesUndef>;
+  def _UNDEF_D : PredTwoOpImmPseudo<NAME # _D, ZPR64, Operand<i32>, FalseLanesUndef>;
+
+  def : SVE_Shift_DupImm_Div_Pat<nxv16i8, divOp, nxv16i1, i32, IsPow2_32, Log2_32, !cast<Instruction>(NAME # _UNDEF_B)>;
+  def : SVE_Shift_DupImm_Div_Pat<nxv8i16, divOp, nxv8i1,  i32, IsPow2_32, Log2_32, !cast<Instruction>(NAME # _UNDEF_H)>;
+  def : SVE_Shift_DupImm_Div_Pat<nxv4i32, divOp, nxv4i1,  i32, IsPow2_32, Log2_32, !cast<Instruction>(NAME # _UNDEF_S)>;
+  def : SVE_Shift_DupImm_Div_Pat<nxv2i64, divOp, nxv2i1,  i64, IsPow2_64, Log2_64, !cast<Instruction>(NAME # _UNDEF_D)>;
+}
+
 multiclass sve_int_bin_pred_all_active_bhsd<SDPatternOperator op> {
   def _UNDEF_B : PredTwoOpPseudo<NAME # _B, ZPR8, FalseLanesUndef>;
   def _UNDEF_H : PredTwoOpPseudo<NAME # _H, ZPR16, FalseLanesUndef>;
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
@@ -0,0 +1,389 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -aarch64-sve-vector-bits-min=256  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
+; RUN: llc -aarch64-sve-vector-bits-min=384  < %s | FileCheck %s -check-prefixes=CHECK
+; RUN: llc -aarch64-sve-vector-bits-min=512  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=640  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=768  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=896  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) #0 {
+; CHECK-LABEL: sdiv_v8i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #5
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT:    ret
+  %res = sdiv <8 x i8> %op1, shufflevector (<8 x i8> insertelement (<8 x i8> poison, i8 32, i32 0), <8 x i8> poison, <8 x i32> zeroinitializer)
+  ret <8 x i8> %res
+}
+
+define <16 x i8> @sdiv_v16i8(<16 x i8> %op1) #0 {
+; CHECK-LABEL: sdiv_v16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #5
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    ret
+  %res = sdiv <16 x i8> %op1, shufflevector (<16 x i8> insertelement (<16 x i8> poison, i8 32, i32 0), <16 x i8> poison, <16 x i32> zeroinitializer)
+  ret <16 x i8> %res
+}
+
+define void @sdiv_v32i8(<32 x i8>* %a) #0 {
+; CHECK-LABEL: sdiv_v32i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b, vl32
+; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #5
+; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
+; CHECK-NEXT:    ret
+  %op1 = load <32 x i8>, <32 x i8>* %a
+  %res = sdiv <32 x i8> %op1, shufflevector (<32 x i8> insertelement (<32 x i8> poison, i8 32, i32 0), <32 x i8> poison, <32 x i32> zeroinitializer)
+  store <32 x i8> %res, <32 x i8>* %a
+  ret void
+}
+
+define void @sdiv_v64i8(<64 x i8>* %a) #0 {
+; VBITS_EQ_256-LABEL: sdiv_v64i8:
+; VBITS_EQ_256:       // %bb.0:
+; VBITS_EQ_256-NEXT:    mov w8, #32
+; VBITS_EQ_256-NEXT:    ptrue p0.b, vl32
+; VBITS_EQ_256-NEXT:    ld1b { z0.b }, p0/z, [x0, x8]
+; VBITS_EQ_256-NEXT:    ld1b { z1.b }, p0/z, [x0]
+; VBITS_EQ_256-NEXT:    asrd z0.b, p0/m, z0.b, #5
+; VBITS_EQ_256-NEXT:    asrd z1.b, p0/m, z1.b, #5
+; VBITS_EQ_256-NEXT:    st1b { z0.b }, p0, [x0, x8]
+; VBITS_EQ_256-NEXT:    st1b { z1.b }, p0, [x0]
+; VBITS_EQ_256-NEXT:    ret
+;
+; VBITS_GE_512-LABEL: sdiv_v64i8:
+; VBITS_GE_512:       // %bb.0:
+; VBITS_GE_512-NEXT:    ptrue p0.b, vl64
+; VBITS_GE_512-NEXT:    ld1b { z0.b }, p0/z, [x0]
+; VBITS_GE_512-NEXT:    asrd z0.b, p0/m, z0.b, #5
+; VBITS_GE_512-NEXT:    st1b { z0.b }, p0, [x0]
+; VBITS_GE_512-NEXT:    ret
+  %op1 = load <64 x i8>, <64 x i8>* %a
+  %res = sdiv <64 x i8> %op1, shufflevector (<64 x i8> insertelement (<64 x i8> poison, i8 32, i32 0), <64 x i8> poison, <64 x i32> zeroinitializer)
+  store <64 x i8> %res, <64 x i8>* %a
+  ret void
+}
+
+define void @sdiv_v128i8(<128 x i8>* %a) #0 {
+; VBITS_GE_1024-LABEL: sdiv_v128i8:
+; VBITS_GE_1024:       // %bb.0:
+; VBITS_GE_1024-NEXT:    ptrue p0.b, vl128
+; VBITS_GE_1024-NEXT:    ld1b { z0.b }, p0/z, [x0]
+; VBITS_GE_1024-NEXT:    asrd z0.b, p0/m, z0.b, #5
+; VBITS_GE_1024-NEXT:    st1b { z0.b }, p0, [x0]
+; VBITS_GE_1024-NEXT:    ret
+  %op1 = load <128 x i8>, <128 x i8>* %a
+  %res = sdiv <128 x i8> %op1, shufflevector (<128 x i8> insertelement (<128 x i8> poison, i8 32, i32 0), <128 x i8> poison, <128 x i32> zeroinitializer)
+  store <128 x i8> %res, <128 x i8>* %a
+  ret void
+}
+
+define void @sdiv_v256i8(<256 x i8>* %a) #0 {
+; VBITS_GE_2048-LABEL: sdiv_v256i8:
+; VBITS_GE_2048:       // %bb.0:
+; VBITS_GE_2048-NEXT:    ptrue p0.b, vl256
+; VBITS_GE_2048-NEXT:    ld1b { z0.b }, p0/z, [x0]
+; VBITS_GE_2048-NEXT:    asrd z0.b, p0/m, z0.b, #5
+; VBITS_GE_2048-NEXT:    st1b { z0.b }, p0, [x0]
+; VBITS_GE_2048-NEXT:    ret
+  %op1 = load <256 x i8>, <256 x i8>* %a
+  %res = sdiv <256 x i8> %op1, shufflevector (<256 x i8> insertelement (<256 x i8> poison, i8 32, i32 0), <256 x i8> poison, <256 x i32> zeroinitializer)
+  store <256 x i8> %res, <256 x i8>* %a
+  ret void
+}
+
+define <4 x i16> @sdiv_v4i16(<4 x i16> %op1) #0 {
+; CHECK-LABEL: sdiv_v4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #5
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT:    ret
+  %res = sdiv <4 x i16> %op1, shufflevector (<4 x i16> insertelement (<4 x i16> poison, i16 32, i32 0), <4 x i16> poison, <4 x i32> zeroinitializer)
+  ret <4 x i16> %res
+}
+
+define <8 x i16> @sdiv_v8i16(<8 x i16> %op1) #0 {
+; CHECK-LABEL: sdiv_v8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #5
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    ret
+  %res = sdiv <8 x i16> %op1, shufflevector (<8 x i16> insertelement (<8 x i16> poison, i16 32, i32 0), <8 x i16> poison, <8 x i32> zeroinitializer)
+  ret <8 x i16> %res
+}
+
+define void @sdiv_v16i16(<16 x i16>* %a) #0 {
+; CHECK-LABEL: sdiv_v16i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h, vl16
+; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #5
+; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
+; CHECK-NEXT:    ret
+  %op1 = load <16 x i16>, <16 x i16>* %a
+  %res = sdiv <16 x i16> %op1, shufflevector (<16 x i16> insertelement (<16 x i16> poison, i16 32, i32 0), <16 x i16> poison, <16 x i32> zeroinitializer)
+  store <16 x i16> %res, <16 x i16>* %a
+  ret void
+}
+
+define void @sdiv_v32i16(<32 x i16>* %a) #0 {
+; VBITS_EQ_256-LABEL: sdiv_v32i16:
+; VBITS_EQ_256:       // %bb.0:
+; VBITS_EQ_256-NEXT:    mov x8, #16
+; VBITS_EQ_256-NEXT:    ptrue p0.h, vl16
+; VBITS_EQ_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
+; VBITS_EQ_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
+; VBITS_EQ_256-NEXT:    asrd z0.h, p0/m, z0.h, #5
+; VBITS_EQ_256-NEXT:    asrd z1.h, p0/m, z1.h, #5
+; VBITS_EQ_256-NEXT:    st1h { z0.h }, p0, [x0, x8, lsl #1]
+; VBITS_EQ_256-NEXT:    st1h { z1.h }, p0, [x0]
+; VBITS_EQ_256-NEXT:    ret
+;
+; VBITS_GE_512-LABEL: sdiv_v32i16:
+; VBITS_GE_512:       // %bb.0:
+; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
+; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
+; VBITS_GE_512-NEXT:    asrd z0.h, p0/m, z0.h, #5
+; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x0]
+; VBITS_GE_512-NEXT:    ret
+  %op1 = load <32 x i16>, <32 x i16>* %a
+  %res = sdiv <32 x i16> %op1, shufflevector (<32 x i16> insertelement (<32 x i16> poison, i16 32, i32 0), <32 x i16> poison, <32 x i32> zeroinitializer)
+  store <32 x i16> %res, <32 x i16>* %a
+  ret void
+}
+
+define void @sdiv_v64i16(<64 x i16>* %a) #0 {
+; VBITS_GE_1024-LABEL: sdiv_v64i16:
+; VBITS_GE_1024:       // %bb.0:
+; VBITS_GE_1024-NEXT:    ptrue p0.h, vl64
+; VBITS_GE_1024-NEXT:    ld1h { z0.h }, p0/z, [x0]
+; VBITS_GE_1024-NEXT:    asrd z0.h, p0/m, z0.h, #5
+; VBITS_GE_1024-NEXT:    st1h { z0.h }, p0, [x0]
+; VBITS_GE_1024-NEXT:    ret
+  %op1 = load <64 x i16>, <64 x i16>* %a
+  %res = sdiv <64 x i16> %op1, shufflevector (<64 x i16> insertelement (<64 x i16> poison, i16 32, i32 0), <64 x i16> poison, <64 x i32> zeroinitializer)
+  store <64 x i16> %res, <64 x i16>* %a
+  ret void
+}
+
+define void @sdiv_v128i16(<128 x i16>* %a) #0 {
+; VBITS_GE_2048-LABEL: sdiv_v128i16:
+; VBITS_GE_2048:       // %bb.0:
+; VBITS_GE_2048-NEXT:    ptrue p0.h, vl128
+; VBITS_GE_2048-NEXT:    ld1h { z0.h }, p0/z, [x0]
+; VBITS_GE_2048-NEXT:    asrd z0.h, p0/m, z0.h, #5
+; VBITS_GE_2048-NEXT:    st1h { z0.h }, p0, [x0]
+; VBITS_GE_2048-NEXT:    ret
+  %op1 = load <128 x i16>, <128 x i16>* %a
+  %res = sdiv <128 x i16> %op1, shufflevector (<128 x i16> insertelement (<128 x i16> poison, i16 32, i32 0), <128 x i16> poison, <128 x i32> zeroinitializer)
+  store <128 x i16> %res, <128 x i16>* %a
+  ret void
+}
+
+define <2 x i32> @sdiv_v2i32(<2 x i32> %op1) #0 {
+; CHECK-LABEL: sdiv_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT:    ret
+  %res = sdiv <2 x i32> %op1, shufflevector (<2 x i32> insertelement (<2 x i32> poison, i32 32, i32 0), <2 x i32> poison, <2 x i32> zeroinitializer)
+  ret <2 x i32> %res
+}
+
+define <4 x i32> @sdiv_v4i32(<4 x i32> %op1) #0 {
+; CHECK-LABEL: sdiv_v4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    ret
+  %res = sdiv <4 x i32> %op1, shufflevector (<4 x i32> insertelement (<4 x i32> poison, i32 32, i32 0), <4 x i32> poison, <4 x i32> zeroinitializer)
+  ret <4 x i32> %res
+}
+
+define void @sdiv_v8i32(<8 x i32>* %a) #0 {
+; CHECK-LABEL: sdiv_v8i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s, vl8
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
+; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
+; CHECK-NEXT:    ret
+  %op1 = load <8 x i32>, <8 x i32>* %a
+  %res = sdiv <8 x i32> %op1, shufflevector (<8 x i32> insertelement (<8 x i32> poison, i32 32, i32 0), <8 x i32> poison, <8 x i32> zeroinitializer)
+  store <8 x i32> %res, <8 x i32>* %a
+  ret void
+}
+
+define void @sdiv_v16i32(<16 x i32>* %a) #0 {
+; VBITS_EQ_256-LABEL: sdiv_v16i32:
+; VBITS_EQ_256:       // %bb.0:
+; VBITS_EQ_256-NEXT:    mov x8, #8
+; VBITS_EQ_256-NEXT:    ptrue p0.s, vl8
+; VBITS_EQ_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
+; VBITS_EQ_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
+; VBITS_EQ_256-NEXT:    asrd z0.s, p0/m, z0.s, #5
+; VBITS_EQ_256-NEXT:    asrd z1.s, p0/m, z1.s, #5
+; VBITS_EQ_256-NEXT:    st1w { z0.s }, p0, [x0, x8, lsl #2]
+; VBITS_EQ_256-NEXT:    st1w { z1.s }, p0, [x0]
+; VBITS_EQ_256-NEXT:    ret
+;
+; VBITS_GE_512-LABEL: sdiv_v16i32:
+; VBITS_GE_512:       // %bb.0:
+; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
+; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
+; VBITS_GE_512-NEXT:    asrd z0.s, p0/m, z0.s, #5
+; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
+; VBITS_GE_512-NEXT:    ret
+  %op1 = load <16 x i32>, <16 x i32>* %a
+  %res = sdiv <16 x i32> %op1, shufflevector (<16 x i32> insertelement (<16 x i32> poison, i32 32, i32 0), <16 x i32> poison, <16 x i32> zeroinitializer)
+  store <16 x i32> %res, <16 x i32>* %a
+  ret void
+}
+
+define void @sdiv_v32i32(<32 x i32>* %a) #0 {
+; VBITS_GE_1024-LABEL: sdiv_v32i32:
+; VBITS_GE_1024:       // %bb.0:
+; VBITS_GE_1024-NEXT:    ptrue p0.s, vl32
+; VBITS_GE_1024-NEXT:    ld1w { z0.s }, p0/z, [x0]
+; VBITS_GE_1024-NEXT:    asrd z0.s, p0/m, z0.s, #5
+; VBITS_GE_1024-NEXT:    st1w { z0.s }, p0, [x0]
+; VBITS_GE_1024-NEXT:    ret
+  %op1 = load <32 x i32>, <32 x i32>* %a
+  %res = sdiv <32 x i32> %op1, shufflevector (<32 x i32> insertelement (<32 x i32> poison, i32 32, i32 0), <32 x i32> poison, <32 x i32> zeroinitializer)
+  store <32 x i32> %res, <32 x i32>* %a
+  ret void
+}
+
+define void @sdiv_v64i32(<64 x i32>* %a) #0 {
+; VBITS_GE_2048-LABEL: sdiv_v64i32:
+; VBITS_GE_2048:       // %bb.0:
+; VBITS_GE_2048-NEXT:    ptrue p0.s, vl64
+; VBITS_GE_2048-NEXT:    ld1w { z0.s }, p0/z, [x0]
+; VBITS_GE_2048-NEXT:    asrd z0.s, p0/m, z0.s, #5
+; VBITS_GE_2048-NEXT:    st1w { z0.s }, p0, [x0]
+; VBITS_GE_2048-NEXT:    ret
+  %op1 = load <64 x i32>, <64 x i32>* %a
+  %res = sdiv <64 x i32> %op1, shufflevector (<64 x i32> insertelement (<64 x i32> poison, i32 32, i32 0), <64 x i32> poison, <64 x i32> zeroinitializer)
+  store <64 x i32> %res, <64 x i32>* %a
+  ret void
+}
+
+define <1 x i64> @sdiv_v1i64(<1 x i64> %op1) #0 {
+; CHECK-LABEL: sdiv_v1i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #5
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT:    ret
+  %res = sdiv <1 x i64> %op1, shufflevector (<1 x i64> insertelement (<1 x i64> poison, i64 32, i32 0), <1 x i64> poison, <1 x i32> zeroinitializer)
+  ret <1 x i64> %res
+}
+
+; Vector i64 sdiv are not legal for NEON so use SVE when available.
+define <2 x i64> @sdiv_v2i64(<2 x i64> %op1) #0 {
+; CHECK-LABEL: sdiv_v2i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #5
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    ret
+  %res = sdiv <2 x i64> %op1, shufflevector (<2 x i64> insertelement (<2 x i64> poison, i64 32, i32 0), <2 x i64> poison, <2 x i32> zeroinitializer)
+  ret <2 x i64> %res
+}
+
+define void @sdiv_v4i64(<4 x i64>* %a) #0 {
+; CHECK-LABEL: sdiv_v4i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d, vl4
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #5
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
+  %op1 = load <4 x i64>, <4 x i64>* %a
+  %res = sdiv <4 x i64> %op1, shufflevector (<4 x i64> insertelement (<4 x i64> poison, i64 32, i32 0), <4 x i64> poison, <4 x i32> zeroinitializer)
+  store <4 x i64> %res, <4 x i64>* %a
+  ret void
+}
+
+define void @sdiv_v8i64(<8 x i64>* %a) #0 {
+; VBITS_EQ_256-LABEL: sdiv_v8i64:
+; VBITS_EQ_256:       // %bb.0:
+; VBITS_EQ_256-NEXT:    mov x8, #4
+; VBITS_EQ_256-NEXT:    ptrue p0.d, vl4
+; VBITS_EQ_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
+; VBITS_EQ_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
+; VBITS_EQ_256-NEXT:    asrd z0.d, p0/m, z0.d, #5
+; VBITS_EQ_256-NEXT:    asrd z1.d, p0/m, z1.d, #5
+; VBITS_EQ_256-NEXT:    st1d { z0.d }, p0, [x0, x8, lsl #3]
+; VBITS_EQ_256-NEXT:    st1d { z1.d }, p0, [x0]
+; VBITS_EQ_256-NEXT:    ret
+;
+; VBITS_GE_512-LABEL: sdiv_v8i64:
+; VBITS_GE_512:       // %bb.0:
+; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
+; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
+; VBITS_GE_512-NEXT:    asrd z0.d, p0/m, z0.d, #5
+; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
+; VBITS_GE_512-NEXT:    ret
+  %op1 = load <8 x i64>, <8 x i64>* %a
+  %res = sdiv <8 x i64> %op1, shufflevector (<8 x i64> insertelement (<8 x i64> poison, i64 32, i32 0), <8 x i64> poison, <8 x i32> zeroinitializer)
+  store <8 x i64> %res, <8 x i64>* %a
+  ret void
+}
+
+define void @sdiv_v16i64(<16 x i64>* %a) #0 {
+; VBITS_GE_1024-LABEL: sdiv_v16i64:
+; VBITS_GE_1024:       // %bb.0:
+; VBITS_GE_1024-NEXT:    ptrue p0.d, vl16
+; VBITS_GE_1024-NEXT:    ld1d { z0.d }, p0/z, [x0]
+; VBITS_GE_1024-NEXT:    asrd z0.d, p0/m, z0.d, #5
+; VBITS_GE_1024-NEXT:    st1d { z0.d }, p0, [x0]
+; VBITS_GE_1024-NEXT:    ret
+  %op1 = load <16 x i64>, <16 x i64>* %a
+  %res = sdiv <16 x i64> %op1, shufflevector (<16 x i64> insertelement (<16 x i64> poison, i64 32, i32 0), <16 x i64> poison, <16 x i32> zeroinitializer)
+  store <16 x i64> %res, <16 x i64>* %a
+  ret void
+}
+
+define void @sdiv_v32i64(<32 x i64>* %a) #0 {
+; VBITS_GE_2048-LABEL: sdiv_v32i64:
+; VBITS_GE_2048:       // %bb.0:
+; VBITS_GE_2048-NEXT:    ptrue p0.d, vl32
+; VBITS_GE_2048-NEXT:    ld1d { z0.d }, p0/z, [x0]
+; VBITS_GE_2048-NEXT:    asrd z0.d, p0/m, z0.d, #5
+; VBITS_GE_2048-NEXT:    st1d { z0.d }, p0, [x0]
+; VBITS_GE_2048-NEXT:    ret
+  %op1 = load <32 x i64>, <32 x i64>* %a
+  %res = sdiv <32 x i64> %op1, shufflevector (<32 x i64> insertelement (<32 x i64> poison, i64 32, i32 0), <32 x i64> poison, <32 x i32> zeroinitializer)
+  store <32 x i64> %res, <32 x i64>* %a
+  ret void
+}
+
+attributes #0 = { "target-features"="+sve" }
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-sdiv-pow2.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-sdiv-pow2.ll
@@ -0,0 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define <vscale x 4 x i32> @sdiv_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: sdiv_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #23
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 8388608, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer))
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @sdiv_i32_neg(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: sdiv_i32_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.s, #0 // =0x0
+; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #25
+; CHECK-NEXT:    subr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -33554432, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer))
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @sdiv_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: sdiv_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #53
+; CHECK-NEXT:    ret
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sdiv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 9007199254740992, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer))
+  ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @sdiv_i64_neg(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: sdiv_i64_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.d, #0 // =0x0
+; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #55
+; CHECK-NEXT:    subr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sdiv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 -36028797018963968, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer))
+  ret <vscale x 2 x i64> %out
+}
+
+declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
+declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sdiv.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+attributes #0 = { "target-features"="+sve" }
diff --git a/llvm/test/CodeGen/AArch64/sve-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-sdiv-pow2.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-sdiv-pow2.ll
@@ -0,0 +1,90 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define <vscale x 16 x i8> @sdiv_i8(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: sdiv_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #4
+; CHECK-NEXT:    ret
+  %out = sdiv <vscale x 16 x i8> %a, shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 16, i32 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer)
+  ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 16 x i8> @sdiv_i8_neg(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: sdiv_i8_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #6
+; CHECK-NEXT:    subr z0.b, z0.b, #0 // =0x0
+; CHECK-NEXT:    ret
+  %out = sdiv <vscale x 16 x i8> %a, shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 -64, i32 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer)
+  ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sdiv_i16(<vscale x 8 x i16> %a) #0 {
+; CHECK-LABEL: sdiv_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #10
+; CHECK-NEXT:    ret
+  %out = sdiv <vscale x 8 x i16> %a, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1024, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+  ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 8 x i16> @sdiv_i16_neg(<vscale x 8 x i16> %a) #0 {
+; CHECK-LABEL: sdiv_i16_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #12
+; CHECK-NEXT:    subr z0.h, z0.h, #0 // =0x0
+; CHECK-NEXT:    ret
+  %out = sdiv <vscale x 8 x i16> %a, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 -4096, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+  ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sdiv_i32(<vscale x 4 x i32> %a) #0 {
+; CHECK-LABEL: sdiv_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #23
+; CHECK-NEXT:    ret
+  %out = sdiv <vscale x 4 x i32> %a, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 8388608, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @sdiv_i32_neg(<vscale x 4 x i32> %a) #0 {
+; CHECK-LABEL: sdiv_i32_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #25
+; CHECK-NEXT:    subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT:    ret
+  %out = sdiv <vscale x 4 x i32> %a, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -33554432, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @sdiv_i64(<vscale x 2 x i64> %a) #0 {
+; CHECK-LABEL: sdiv_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #53
+; CHECK-NEXT:    ret
+  %out = sdiv <vscale x 2 x i64> %a, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 9007199254740992, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+  ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @sdiv_i64_neg(<vscale x 2 x i64> %a) #0 {
+; CHECK-LABEL: sdiv_i64_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #55
+; CHECK-NEXT:    subr z0.d, z0.d, #0 // =0x0
+; CHECK-NEXT:    ret
+  %out = sdiv <vscale x 2 x i64> %a, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 -36028797018963968, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+  ret <vscale x 2 x i64> %out
+}
+
+attributes #0 = { "target-features"="+sve" }