Index: llvm/include/llvm/IR/IntrinsicsAArch64.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -2770,6 +2770,19 @@
                  LLVMMatchType<0>, llvm_i32_ty],
                 [ImmArg<ArgIndex<6>>]>;
 
+  class SME2_VG2_Multi_Imm_Intrinsic
+    : DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>],
+                [llvm_anyvector_ty, LLVMMatchType<0>,
+                 llvm_i32_ty],
+                [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+
+  class SME2_VG4_Multi_Imm_Intrinsic
+    : DefaultAttrsIntrinsic<[LLVMSubdivide4VectorType<0>],
+                [llvm_anyvector_ty, LLVMMatchType<0>,
+                 LLVMMatchType<0>, LLVMMatchType<0>,
+                 llvm_i32_ty],
+                [IntrNoMem, ImmArg<ArgIndex<4>>]>;
+
   class SME2_ZA_Write_VG2_Intrinsic
    : DefaultAttrsIntrinsic<[],
                [llvm_i32_ty,
@@ -2839,6 +2852,24 @@
   def int_aarch64_sme_fmla_lane_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
   def int_aarch64_sme_fmls_lane_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
 
+  // Multi-vector saturating rounding shift right intrinsics
+
+  def int_aarch64_sve_sqrshr_x2 : SME2_VG2_Multi_Imm_Intrinsic;
+  def int_aarch64_sve_uqrshr_x2 : SME2_VG2_Multi_Imm_Intrinsic;
+  def int_aarch64_sve_sqrshr_x4 : SME2_VG4_Multi_Imm_Intrinsic;
+  def int_aarch64_sve_uqrshr_x4 : SME2_VG4_Multi_Imm_Intrinsic;
+
+  def int_aarch64_sve_sqrshrn_x2 : SME2_VG2_Multi_Imm_Intrinsic;
+  def int_aarch64_sve_uqrshrn_x2 : SME2_VG2_Multi_Imm_Intrinsic;
+  def int_aarch64_sve_sqrshrn_x4 : SME2_VG4_Multi_Imm_Intrinsic;
+  def int_aarch64_sve_uqrshrn_x4 : SME2_VG4_Multi_Imm_Intrinsic;
+
+  def int_aarch64_sve_sqrshru_x2 : SME2_VG2_Multi_Imm_Intrinsic;
+  def int_aarch64_sve_sqrshru_x4 : SME2_VG4_Multi_Imm_Intrinsic;
+
+  def int_aarch64_sve_sqrshrun_x2 : SME2_VG2_Multi_Imm_Intrinsic;
+  def int_aarch64_sve_sqrshrun_x4 : SME2_VG4_Multi_Imm_Intrinsic;
+
   //
   // Multi-vector multiply-add/subtract long
   //
Index: llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
===================================================================
--- llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -619,18 +619,18 @@
 defm MOVA_VG2_2ZMXI : sme2_mova_array_to_vec_vg2_multi<0b000, "mova">;
 defm MOVA_VG4_4ZMXI : sme2_mova_array_to_vec_vg4_multi<0b1000, "mova">;
 
-defm SQRSHR_VG2_Z2ZI : sme2_sat_shift_vector_vg2<"sqrshr", 0b0, 0b0>;
-defm SQRSHR_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshr", 0b000>;
+defm SQRSHR_VG2_Z2ZI : sme2_sat_shift_vector_vg2<"sqrshr", 0b0, 0b0, int_aarch64_sve_sqrshr_x2>;
+defm SQRSHR_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshr", 0b000, int_aarch64_sve_sqrshr_x4>;
 
-defm UQRSHR_VG2_Z2ZI : sme2_sat_shift_vector_vg2<"uqrshr", 0b0, 0b1>;
-defm UQRSHR_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"uqrshr", 0b001>;
+defm UQRSHR_VG2_Z2ZI : sme2_sat_shift_vector_vg2<"uqrshr", 0b0, 0b1, int_aarch64_sve_uqrshr_x2>;
+defm UQRSHR_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"uqrshr", 0b001, int_aarch64_sve_uqrshr_x4>;
 
-defm SQRSHRU_VG2_Z2ZI : sme2_sat_shift_vector_vg2<"sqrshru", 0b1, 0b0>;
-defm SQRSHRU_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshru", 0b010>;
+defm SQRSHRU_VG2_Z2ZI : sme2_sat_shift_vector_vg2<"sqrshru", 0b1, 0b0, int_aarch64_sve_sqrshru_x2>;
+defm SQRSHRU_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshru", 0b010, int_aarch64_sve_sqrshru_x4>;
 
-defm SQRSHRN_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshrn", 0b100>;
-defm UQRSHRN_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"uqrshrn", 0b101>;
-defm SQRSHRUN_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshrun", 0b110>;
+defm SQRSHRN_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshrn", 0b100, int_aarch64_sve_sqrshrn_x4>;
+defm UQRSHRN_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"uqrshrn", 0b101, int_aarch64_sve_uqrshrn_x4>;
+defm SQRSHRUN_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshrun", 0b110, int_aarch64_sve_sqrshrun_x4>;
 
 defm SEL_VG2_2ZP2Z2Z: sme2_sel_vector_vg2<"sel">;
 defm SEL_VG4_4ZP4Z4Z: sme2_sel_vector_vg4<"sel">;
Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
===================================================================
--- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3694,9 +3694,9 @@
 defm SQCVTN_Z2Z_StoH  : sve2p1_multi_vec_extract_narrow<"sqcvtn", 0b00, int_aarch64_sve_sqcvtn_x2>;
 defm UQCVTN_Z2Z_StoH  : sve2p1_multi_vec_extract_narrow<"uqcvtn", 0b01, int_aarch64_sve_uqcvtn_x2>;
 defm SQCVTUN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"sqcvtun", 0b10, int_aarch64_sve_sqcvtun_x2>;
-defm SQRSHRN_Z2ZI_StoH  : sve2p1_multi_vec_shift_narrow<"sqrshrn", 0b101>;
-defm UQRSHRN_Z2ZI_StoH  : sve2p1_multi_vec_shift_narrow<"uqrshrn", 0b111>;
-defm SQRSHRUN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"sqrshrun", 0b001>;
+defm SQRSHRN_Z2ZI_StoH  : sve2p1_multi_vec_shift_narrow<"sqrshrn", 0b101, int_aarch64_sve_sqrshrn_x2>;
+defm UQRSHRN_Z2ZI_StoH  : sve2p1_multi_vec_shift_narrow<"uqrshrn", 0b111, int_aarch64_sve_uqrshrn_x2>;
+defm SQRSHRUN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"sqrshrun", 0b001, int_aarch64_sve_sqrshrun_x2>;
 
 // Load to two registers
 def LD1B_2Z        : sve2p1_mem_cld_ss_2z<"ld1b", 0b00, 0b0, ZZ_b_mul_r, GPR64shifted8>;
Index: llvm/lib/Target/AArch64/SMEInstrFormats.td
===================================================================
--- llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -133,6 +133,15 @@
                                               (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3),
                                               zpr_ty:$Zm, imm_ty:$i)>;
 
+class SME2_Sat_Shift_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty>
+    : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, (i32 imm_ty:$i))),
+                  (!cast<Instruction>(name) (REG_SEQUENCE ZPR2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1), imm_ty:$i)>;
+
+class SME2_Sat_Shift_VG4_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty>
+    : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4, (i32 imm_ty:$i))),
+                  (!cast<Instruction>(name) (REG_SEQUENCE ZPR4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3),
+                                            imm_ty:$i)>;
+
 class SME2_Cvt_VG4_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt>
     : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4)),
                   (!cast<Instruction>(name) (REG_SEQUENCE ZPR4Mul4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3))>;
@@ -3918,7 +3927,7 @@
 //===----------------------------------------------------------------------===//
 // SME2 multi-vec saturating shift right narrow
 class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u>
-    : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, vecshiftR16:$imm4),
+    : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4),
         mnemonic, "\t$Zd, $Zn, $imm4",
         "", []>, Sched<[]> {
   bits<4> imm4;
@@ -3933,8 +3942,10 @@
   let Inst{4-0}   = Zd;
 }
 
-multiclass sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u> {
+multiclass sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u, SDPatternOperator intrinsic> {
   def _H : sme2_sat_shift_vector_vg2<mnemonic, op, u>;
+
+  def : SME2_Sat_Shift_VG2_Pat<NAME # _H, intrinsic, nxv8i16, nxv4i32, tvecshiftR16>;
 }
 
 class sme2_sat_shift_vector_vg4<bits<2> sz, bits<3> op, ZPRRegOp zpr_ty,
@@ -3956,18 +3967,21 @@
   let Inst{4-0}   = Zd;
 }
 
-multiclass sme2_sat_shift_vector_vg4<string mnemonic, bits<3> op> {
-  def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, vecshiftR32,
+multiclass sme2_sat_shift_vector_vg4<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
+  def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, tvecshiftR32,
                                      mnemonic>{
     bits<5> imm;
     let Inst{20-16} = imm;
   }
-  def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, vecshiftR64,
+  def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, tvecshiftR64,
                                       mnemonic> {
     bits<6> imm;
     let Inst{22}    = imm{5};
     let Inst{20-16} = imm{4-0};
   }
+
+  def : SME2_Sat_Shift_VG4_Pat<NAME # _B, intrinsic, nxv16i8, nxv4i32, tvecshiftR32>;
+  def : SME2_Sat_Shift_VG4_Pat<NAME # _H, intrinsic, nxv8i16, nxv2i64, tvecshiftR64>;
 }
 
 //===----------------------------------------------------------------------===//
Index: llvm/lib/Target/AArch64/SVEInstrFormats.td
===================================================================
--- llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -549,6 +549,10 @@
 : Pat<(vtd (add vt1:$Op1, (op (pt (SVEAllActive)), vt2:$Op2, vt3:$Op3))),
       (inst $Op1, $Op2, $Op3)>;
 
+class SVE2p1_Sat_Shift_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty>
+    : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, (i32 imm_ty:$i))),
+                  (!cast<Instruction>(name) (REG_SEQUENCE ZPR2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1), imm_ty:$i)>;
+
 class SVE2p1_Cvt_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt>
     : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2)),
                   (!cast<Instruction>(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1))>;
@@ -8872,7 +8876,7 @@
 
 // SVE2 multi-vec shift narrow
 class sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, bits<2> tsz>
-    : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, vecshiftR16:$imm4),
+    : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4),
         mnemonic, "\t$Zd, $Zn, $imm4",
         "", []>, Sched<[]> {
   bits<5> Zd;
@@ -8891,10 +8895,11 @@
   let Inst{4-0}   = Zd;
 }
 
-multiclass sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc> {
-  def : sve2p1_multi_vec_shift_narrow<mnemonic, opc, 0b01>;
-}
+multiclass sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, SDPatternOperator intrinsic> {
+  def NAME : sve2p1_multi_vec_shift_narrow<mnemonic, opc, 0b01>;
 
+  def : SVE2p1_Sat_Shift_VG2_Pat<NAME, intrinsic, nxv8i16, nxv4i32, tvecshiftR16>;
+}
 
 // SME2 multi-vec contiguous load (scalar plus scalar, two registers)
 class sve2p1_mem_cld_ss_2z<string mnemonic, bits<2> msz, bit n,
Index: llvm/test/CodeGen/AArch64/sme2-intrinsics-qrshr.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sme2-intrinsics-qrshr.ll
@@ -0,0 +1,231 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -mattr=+bf16 -verify-machineinstrs < %s | FileCheck %s
+
+;
+; S/UQRSHR x2
+;
+
+define <vscale x 8 x i16> @multi_vector_sat_shift_narrow_x2_s16(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2) {
+; CHECK-LABEL: multi_vector_sat_shift_narrow_x2_s16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    sqrshr z0.h, { z0.s, z1.s }, #16
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshr.x2.nxv8i16(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, i32 16)
+  ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 8 x i16> @multi_vector_sat_shift_narrow_x2_u16(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2) {
+; CHECK-LABEL: multi_vector_sat_shift_narrow_x2_u16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    uqrshr z0.h, { z0.s, z1.s }, #16
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshr.x2.nxv8i16(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, i32 16)
+  ret <vscale x 8 x i16> %res
+}
+
+;
+; S/UQRSHR x4
+;
+
+define <vscale x 16 x i8> @multi_vector_sat_shift_narrow_x4_s8(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) {
+; CHECK-LABEL: multi_vector_sat_shift_narrow_x4_s8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    sqrshr z0.b, { z0.s - z3.s }, #32
+; CHECK-NEXT:    ret
+  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshr.x4.nxv16i8(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4, i32 32)
+  ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @multi_vector_sat_shift_narrow_x4_s16(<vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4) {
+; CHECK-LABEL: multi_vector_sat_shift_narrow_x4_s16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    sqrshr z0.h, { z0.d - z3.d }, #64
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshr.x4.nxv8i16(<vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4, i32 64)
+  ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 16 x i8> @multi_vector_sat_shift_narrow_x4_u8(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) {
+; CHECK-LABEL: multi_vector_sat_shift_narrow_x4_u8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    uqrshr z0.b, { z0.s - z3.s }, #32
+; CHECK-NEXT:    ret
+  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshr.x4.nxv16i8(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4, i32 32)
+  ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @multi_vector_sat_shift_narrow_x4_u16(<vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4) {
+; CHECK-LABEL: multi_vector_sat_shift_narrow_x4_u16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    uqrshr z0.h, { z0.d - z3.d }, #64
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshr.x4.nxv8i16(<vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4, i32 64)
+  ret <vscale x 8 x i16> %res
+}
+
+; S/UQRSHRN x4
+
+define <vscale x 16 x i8> @multi_vector_sat_shift_narrow_interleave_x4_s8(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) {
+; CHECK-LABEL: multi_vector_sat_shift_narrow_interleave_x4_s8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    sqrshrn z0.b, { z0.s - z3.s }, #32
+; CHECK-NEXT:    ret
+  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrn.x4.nxv16i8(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4, i32 32)
+  ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @multi_vector_sat_shift_narrow_interleave_x4_s16(<vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4) {
+; CHECK-LABEL: multi_vector_sat_shift_narrow_interleave_x4_s16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    sqrshrn z0.h, { z0.d - z3.d }, #64
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrn.x4.nxv8i16(<vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4, i32 64)
+  ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 16 x i8> @multi_vector_sat_shift_narrow_interleave_x4_u8(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) {
+; CHECK-LABEL: multi_vector_sat_shift_narrow_interleave_x4_u8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    uqrshrn z0.b, { z0.s - z3.s }, #32
+; CHECK-NEXT:    ret
+  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshrn.x4.nxv16i8(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4, i32 32)
+  ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @multi_vector_sat_shift_narrow_interleave_x4_u16(<vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4) {
+; CHECK-LABEL: multi_vector_sat_shift_narrow_interleave_x4_u16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    uqrshrn z0.h, { z0.d - z3.d }, #64
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshrn.x4.nxv8i16(<vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4, i32 64)
+  ret <vscale x 8 x i16> %res
+}
+
+; SQRSHRU x2
+
+define <vscale x 8 x i16> @multi_vector_sat_shift_unsigned_narrow_x2_u16(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2) {
+; CHECK-LABEL: multi_vector_sat_shift_unsigned_narrow_x2_u16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    sqrshru z0.h, { z0.s, z1.s }, #16
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshru.x2.nxv8i16(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, i32 16)
+  ret <vscale x 8 x i16> %res
+}
+
+; SQRSHRU x4
+
+define <vscale x 16 x i8> @multi_vector_sat_shift_unsigned_narrow_x4_u8(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) {
+; CHECK-LABEL: multi_vector_sat_shift_unsigned_narrow_x4_u8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    sqrshru z0.b, { z0.s - z3.s }, #32
+; CHECK-NEXT:    ret
+  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshru.x4.nxv16i8(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4, i32 32)
+  ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @multi_vector_sat_shift_unsigned_narrow_x4_u16(<vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4) {
+; CHECK-LABEL: multi_vector_sat_shift_unsigned_narrow_x4_u16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    sqrshru z0.h, { z0.d - z3.d }, #64
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshru.x4.nxv8i16(<vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4, i32 64)
+  ret <vscale x 8 x i16> %res
+}
+
+; SQRSHRUN x4
+
+define <vscale x 16 x i8> @multi_vector_sat_shift_unsigned_narrow_interleave_x4_u8(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) {
+; CHECK-LABEL: multi_vector_sat_shift_unsigned_narrow_interleave_x4_u8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    sqrshrun z0.b, { z0.s - z3.s }, #32
+; CHECK-NEXT:    ret
+  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrun.x4.nxv16i8(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4, i32 32)
+  ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @multi_vector_sat_shift_unsigned_narrow_interleave_x4_u16(<vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4) {
+; CHECK-LABEL: multi_vector_sat_shift_unsigned_narrow_interleave_x4_u16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    sqrshrun z0.h, { z0.d - z3.d }, #64
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrun.x4.nxv8i16(<vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4, i32 64)
+  ret <vscale x 8 x i16> %res
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrshr.x2.nxv8i16(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrshr.x4.nxv16i8(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrshr.x4.nxv8i16(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32)
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.uqrshr.x2.nxv8i16(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.uqrshr.x4.nxv16i8(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.uqrshr.x4.nxv8i16(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrn.x4.nxv16i8(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrn.x4.nxv8i16(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.uqrshrn.x4.nxv16i8(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.uqrshrn.x4.nxv8i16(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32)
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrshru.x2.nxv8i16(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrshru.x4.nxv16i8(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrshru.x4.nxv8i16(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrun.x4.nxv16i8(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrun.x4.nxv8i16(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32)
Index: llvm/test/CodeGen/AArch64/sve2p1-intrinsics-qrshr.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2p1-intrinsics-qrshr.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+sme2 -mattr=+bf16 -verify-machineinstrs < %s | FileCheck %s
+
+;
+; S/UQRSHRN x2
+;
+
+define <vscale x 8 x i16> @multi_vector_sat_shift_narrow_interleave_x2_s16(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2) {
+; CHECK-LABEL: multi_vector_sat_shift_narrow_interleave_x2_s16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    sqrshrn z0.h, { z0.s, z1.s }, #16
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrn.x2.nxv8i16(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, i32 16)
+  ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 8 x i16> @multi_vector_sat_shift_narrow_interleave_x2_u16(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2) {
+; CHECK-LABEL: multi_vector_sat_shift_narrow_interleave_x2_u16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    uqrshrn z0.h, { z0.s, z1.s }, #16
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshrn.x2.nxv8i16(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, i32 16)
+  ret <vscale x 8 x i16> %res
+}
+
+;
+; SQRSHRUN x2
+;
+
+define <vscale x 8 x i16> @multi_vector_sat_shift_unsigned_narrow_interleave_x2_s16(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2) {
+; CHECK-LABEL: multi_vector_sat_shift_unsigned_narrow_interleave_x2_s16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    sqrshrun z0.h, { z0.s, z1.s }, #16
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrun.x2.nxv8i16(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, i32 16)
+  ret <vscale x 8 x i16> %res
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrn.x2.nxv8i16(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.uqrshrn.x2.nxv8i16(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrun.x2.nxv8i16(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)