Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2770,6 +2770,19 @@ LLVMMatchType<0>, llvm_i32_ty], [ImmArg>]>; + class SME2_VG2_Multi_Imm_Intrinsic + : DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>], + [llvm_anyvector_ty, LLVMMatchType<0>, + llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + + class SME2_VG4_Multi_Imm_Intrinsic + : DefaultAttrsIntrinsic<[LLVMSubdivide4VectorType<0>], + [llvm_anyvector_ty, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>, + llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + class SME2_ZA_Write_VG2_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, @@ -2902,6 +2915,24 @@ def int_aarch64_sme_fmla_lane_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic; def int_aarch64_sme_fmls_lane_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic; + // Multi-vector saturating rounding shift right intrinsics + + def int_aarch64_sve_sqrshr_x2 : SME2_VG2_Multi_Imm_Intrinsic; + def int_aarch64_sve_uqrshr_x2 : SME2_VG2_Multi_Imm_Intrinsic; + def int_aarch64_sve_sqrshr_x4 : SME2_VG4_Multi_Imm_Intrinsic; + def int_aarch64_sve_uqrshr_x4 : SME2_VG4_Multi_Imm_Intrinsic; + + def int_aarch64_sve_sqrshrn_x2 : SME2_VG2_Multi_Imm_Intrinsic; + def int_aarch64_sve_uqrshrn_x2 : SME2_VG2_Multi_Imm_Intrinsic; + def int_aarch64_sve_sqrshrn_x4 : SME2_VG4_Multi_Imm_Intrinsic; + def int_aarch64_sve_uqrshrn_x4 : SME2_VG4_Multi_Imm_Intrinsic; + + def int_aarch64_sve_sqrshru_x2 : SME2_VG2_Multi_Imm_Intrinsic; + def int_aarch64_sve_sqrshru_x4 : SME2_VG4_Multi_Imm_Intrinsic; + + def int_aarch64_sve_sqrshrun_x2 : SME2_VG2_Multi_Imm_Intrinsic; + def int_aarch64_sve_sqrshrun_x4 : SME2_VG4_Multi_Imm_Intrinsic; + // // Multi-vector multiply-add/subtract long // Index: llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -619,18 +619,18 @@ defm MOVA_VG2_2ZMXI : sme2_mova_array_to_vec_vg2_multi<0b000, "mova">; defm MOVA_VG4_4ZMXI : sme2_mova_array_to_vec_vg4_multi<0b1000, "mova">; -defm SQRSHR_VG2_Z2ZI : sme2_sat_shift_vector_vg2<"sqrshr", 0b0, 0b0>; -defm SQRSHR_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshr", 0b000>; +defm SQRSHR_VG2_Z2ZI : sme2_sat_shift_vector_vg2<"sqrshr", 0b0, 0b0, int_aarch64_sve_sqrshr_x2>; +defm SQRSHR_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshr", 0b000, int_aarch64_sve_sqrshr_x4>; -defm UQRSHR_VG2_Z2ZI : sme2_sat_shift_vector_vg2<"uqrshr", 0b0, 0b1>; -defm UQRSHR_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"uqrshr", 0b001>; +defm UQRSHR_VG2_Z2ZI : sme2_sat_shift_vector_vg2<"uqrshr", 0b0, 0b1, int_aarch64_sve_uqrshr_x2>; +defm UQRSHR_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"uqrshr", 0b001, int_aarch64_sve_uqrshr_x4>; -defm SQRSHRU_VG2_Z2ZI : sme2_sat_shift_vector_vg2<"sqrshru", 0b1, 0b0>; -defm SQRSHRU_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshru", 0b010>; +defm SQRSHRU_VG2_Z2ZI : sme2_sat_shift_vector_vg2<"sqrshru", 0b1, 0b0, int_aarch64_sve_sqrshru_x2>; +defm SQRSHRU_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshru", 0b010, int_aarch64_sve_sqrshru_x4>; -defm SQRSHRN_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshrn", 0b100>; -defm UQRSHRN_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"uqrshrn", 0b101>; -defm SQRSHRUN_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshrun", 0b110>; +defm SQRSHRN_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshrn", 0b100, int_aarch64_sve_sqrshrn_x4>; +defm UQRSHRN_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"uqrshrn", 0b101, int_aarch64_sve_uqrshrn_x4>; +defm SQRSHRUN_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshrun", 0b110, int_aarch64_sve_sqrshrun_x4>; defm SEL_VG2_2ZP2Z2Z: sme2_sel_vector_vg2<"sel">; defm SEL_VG4_4ZP4Z4Z: sme2_sel_vector_vg4<"sel">; Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -3694,9 +3694,9 @@ defm SQCVTN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"sqcvtn", 0b00, int_aarch64_sve_sqcvtn_x2>; defm UQCVTN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"uqcvtn", 0b01, int_aarch64_sve_uqcvtn_x2>; defm SQCVTUN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"sqcvtun", 0b10, int_aarch64_sve_sqcvtun_x2>; -defm SQRSHRN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"sqrshrn", 0b101>; -defm UQRSHRN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"uqrshrn", 0b111>; -defm SQRSHRUN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"sqrshrun", 0b001>; +defm SQRSHRN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"sqrshrn", 0b101, int_aarch64_sve_sqrshrn_x2>; +defm UQRSHRN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"uqrshrn", 0b111, int_aarch64_sve_uqrshrn_x2>; +defm SQRSHRUN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"sqrshrun", 0b001, int_aarch64_sve_sqrshrun_x2>; // Load to two registers def LD1B_2Z : sve2p1_mem_cld_ss_2z<"ld1b", 0b00, 0b0, ZZ_b_mul_r, GPR64shifted8>; Index: llvm/lib/Target/AArch64/SMEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SMEInstrFormats.td +++ llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -153,6 +153,15 @@ (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3), zpr_ty:$Zm, imm_ty:$i)>; +class SME2_Sat_Shift_VG2_Pat + : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, (i32 imm_ty:$i))), + (!cast(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1), imm_ty:$i)>; + +class SME2_Sat_Shift_VG4_Pat + : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4, (i32 imm_ty:$i))), + (!cast(name) (REG_SEQUENCE ZPR4Mul4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3), + imm_ty:$i)>; + class SME2_Cvt_VG4_Pat : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4)), (!cast(name) (REG_SEQUENCE ZPR4Mul4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3))>; @@ -3999,7 +4008,7 @@ //===----------------------------------------------------------------------===// // SME2 multi-vec saturating shift right narrow class sme2_sat_shift_vector_vg2 - : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, vecshiftR16:$imm4), + : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4), mnemonic, "\t$Zd, $Zn, $imm4", "", []>, Sched<[]> { bits<4> imm4; @@ -4014,8 +4023,10 @@ let Inst{4-0} = Zd; } -multiclass sme2_sat_shift_vector_vg2 { +multiclass sme2_sat_shift_vector_vg2 { def _H : sme2_sat_shift_vector_vg2; + + def : SME2_Sat_Shift_VG2_Pat; } class sme2_sat_shift_vector_vg4 sz, bits<3> op, ZPRRegOp zpr_ty, @@ -4037,18 +4048,21 @@ let Inst{4-0} = Zd; } -multiclass sme2_sat_shift_vector_vg4 op> { - def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, vecshiftR32, +multiclass sme2_sat_shift_vector_vg4 op, SDPatternOperator intrinsic> { + def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, tvecshiftR32, mnemonic>{ bits<5> imm; let Inst{20-16} = imm; } - def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, vecshiftR64, + def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, tvecshiftR64, mnemonic> { bits<6> imm; let Inst{22} = imm{5}; let Inst{20-16} = imm{4-0}; } + + def : SME2_Sat_Shift_VG4_Pat; + def : SME2_Sat_Shift_VG4_Pat; } //===----------------------------------------------------------------------===// Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -549,6 +549,10 @@ : Pat<(vtd (add vt1:$Op1, (op (pt (SVEAllActive)), vt2:$Op2, vt3:$Op3))), (inst $Op1, $Op2, $Op3)>; +class SVE2p1_Sat_Shift_VG2_Pat + : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, (i32 imm_ty:$i))), + (!cast(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1), imm_ty:$i)>; + class SVE2p1_Cvt_VG2_Pat : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2)), (!cast(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1))>; @@ -8872,7 +8876,7 @@ // SVE2 multi-vec shift narrow class sve2p1_multi_vec_shift_narrow opc, bits<2> tsz> - : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, vecshiftR16:$imm4), + : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4), mnemonic, "\t$Zd, $Zn, $imm4", "", []>, Sched<[]> { bits<5> Zd; @@ -8891,10 +8895,11 @@ let Inst{4-0} = Zd; } -multiclass sve2p1_multi_vec_shift_narrow opc> { - def : sve2p1_multi_vec_shift_narrow; -} +multiclass sve2p1_multi_vec_shift_narrow opc, SDPatternOperator intrinsic> { + def NAME : sve2p1_multi_vec_shift_narrow; + def : SVE2p1_Sat_Shift_VG2_Pat; +} // SME2 multi-vec contiguous load (scalar plus scalar, two registers) class sve2p1_mem_cld_ss_2z msz, bit n, Index: llvm/test/CodeGen/AArch64/sme2-intrinsics-qrshr.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sme2-intrinsics-qrshr.ll @@ -0,0 +1,231 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s + +; +; S/UQRSHR x2 +; + +define @multi_vector_sat_shift_narrow_x2_s16( %unused, %zn1, %zn2) { +; CHECK-LABEL: multi_vector_sat_shift_narrow_x2_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: sqrshr z0.h, { z2.s, z3.s }, #16 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrshr.x2.nxv8i16( %zn1, %zn2, i32 16) + ret %res +} + +define @multi_vector_sat_shift_narrow_x2_u16( %unused, %zn1, %zn2) { +; CHECK-LABEL: multi_vector_sat_shift_narrow_x2_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: uqrshr z0.h, { z2.s, z3.s }, #16 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.uqrshr.x2.nxv8i16( %zn1, %zn2, i32 16) + ret %res +} + +; +; S/UQRSHR x4 +; + +define @multi_vector_sat_shift_narrow_x4_s8( %unused, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_sat_shift_narrow_x4_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqrshr z0.b, { z4.s - z7.s }, #32 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrshr.x4.nxv16i8( %zn1, %zn2, %zn3, %zn4, i32 32) + ret %res +} + +define @multi_vector_sat_shift_narrow_x4_s16( %unused, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_sat_shift_narrow_x4_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqrshr z0.h, { z4.d - z7.d }, #64 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrshr.x4.nxv8i16( %zn1, %zn2, %zn3, %zn4, i32 64) + ret %res +} + +define @multi_vector_sat_shift_narrow_x4_u8( %unused, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_sat_shift_narrow_x4_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: uqrshr z0.b, { z4.s - z7.s }, #32 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.uqrshr.x4.nxv16i8( %zn1, %zn2, %zn3, %zn4, i32 32) + ret %res +} + +define @multi_vector_sat_shift_narrow_x4_u16( %unused, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_sat_shift_narrow_x4_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: uqrshr z0.h, { z4.d - z7.d }, #64 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.uqrshr.x4.nxv8i16( %zn1, %zn2, %zn3, %zn4, i32 64) + ret %res +} + +; S/UQRSHRN x4 + +define @multi_vector_sat_shift_narrow_interleave_x4_s8( %unused, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_sat_shift_narrow_interleave_x4_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqrshrn z0.b, { z4.s - z7.s }, #32 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrshrn.x4.nxv16i8( %zn1, %zn2, %zn3, %zn4, i32 32) + ret %res +} + +define @multi_vector_sat_shift_narrow_interleave_x4_s16( %unused, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_sat_shift_narrow_interleave_x4_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqrshrn z0.h, { z4.d - z7.d }, #64 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrshrn.x4.nxv8i16( %zn1, %zn2, %zn3, %zn4, i32 64) + ret %res +} + +define @multi_vector_sat_shift_narrow_interleave_x4_u8( %unused, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_sat_shift_narrow_interleave_x4_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: uqrshrn z0.b, { z4.s - z7.s }, #32 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.uqrshrn.x4.nxv16i8( %zn1, %zn2, %zn3, %zn4, i32 32) + ret %res +} + +define @multi_vector_sat_shift_narrow_interleave_x4_u16( %unused, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_sat_shift_narrow_interleave_x4_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: uqrshrn z0.h, { z4.d - z7.d }, #64 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.uqrshrn.x4.nxv8i16( %zn1, %zn2, %zn3, %zn4, i32 64) + ret %res +} + +; SQRSHRU x2 + +define @multi_vector_sat_shift_unsigned_narrow_x2_u16( %unused, %zn1, %zn2) { +; CHECK-LABEL: multi_vector_sat_shift_unsigned_narrow_x2_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: sqrshru z0.h, { z2.s, z3.s }, #16 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrshru.x2.nxv8i16( %zn1, %zn2, i32 16) + ret %res +} + +; SQRSHRU x4 + +define @multi_vector_sat_shift_unsigned_narrow_x4_u8( %unused, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_sat_shift_unsigned_narrow_x4_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqrshru z0.b, { z4.s - z7.s }, #32 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrshru.x4.nxv16i8( %zn1, %zn2, %zn3, %zn4, i32 32) + ret %res +} + +define @multi_vector_sat_shift_unsigned_narrow_x4_u16( %unused, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_sat_shift_unsigned_narrow_x4_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqrshru z0.h, { z4.d - z7.d }, #64 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrshru.x4.nxv8i16( %zn1, %zn2, %zn3, %zn4, i32 64) + ret %res +} + +; SQRSHRUN x4 + +define @multi_vector_sat_shift_unsigned_narrow_interleave_x4_u8( %unused, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_sat_shift_unsigned_narrow_interleave_x4_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqrshrun z0.b, { z4.s - z7.s }, #32 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrshrun.x4.nxv16i8( %zn1, %zn2, %zn3, %zn4, i32 32) + ret %res +} + +define @multi_vector_sat_shift_unsigned_narrow_interleave_x4_u16( %unused, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_sat_shift_unsigned_narrow_interleave_x4_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqrshrun z0.h, { z4.d - z7.d }, #64 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrshrun.x4.nxv8i16( %zn1, %zn2, %zn3, %zn4, i32 64) + ret %res +} + +declare @llvm.aarch64.sve.sqrshr.x2.nxv8i16(, , i32) + +declare @llvm.aarch64.sve.sqrshr.x4.nxv16i8(, , , , i32) +declare @llvm.aarch64.sve.sqrshr.x4.nxv8i16(, , , , i32) + +declare @llvm.aarch64.sve.uqrshr.x2.nxv8i16(, , i32) + +declare @llvm.aarch64.sve.uqrshr.x4.nxv16i8(, , , , i32) +declare @llvm.aarch64.sve.uqrshr.x4.nxv8i16(, , , , i32) + +declare @llvm.aarch64.sve.sqrshrn.x4.nxv16i8(, , , , i32) +declare @llvm.aarch64.sve.sqrshrn.x4.nxv8i16(, , , , i32) + +declare @llvm.aarch64.sve.uqrshrn.x4.nxv16i8(, , , , i32) +declare @llvm.aarch64.sve.uqrshrn.x4.nxv8i16(, , , , i32) + +declare @llvm.aarch64.sve.sqrshru.x2.nxv8i16(, , i32) + +declare @llvm.aarch64.sve.sqrshru.x4.nxv16i8(, , , , i32) +declare @llvm.aarch64.sve.sqrshru.x4.nxv8i16(, , , , i32) + +declare @llvm.aarch64.sve.sqrshrun.x4.nxv16i8(, , , , i32) +declare @llvm.aarch64.sve.sqrshrun.x4.nxv8i16(, , , , i32) Index: llvm/test/CodeGen/AArch64/sve2p1-intrinsics-qrshr.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve2p1-intrinsics-qrshr.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -verify-machineinstrs < %s | FileCheck %s + +; +; S/UQRSHRN x2 +; + +define @multi_vector_sat_shift_narrow_interleave_x2_s16( %unused, %zn1, %zn2) { +; CHECK-LABEL: multi_vector_sat_shift_narrow_interleave_x2_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: sqrshrn z0.h, { z2.s, z3.s }, #16 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrshrn.x2.nxv8i16( %zn1, %zn2, i32 16) + ret %res +} + +define @multi_vector_sat_shift_narrow_interleave_x2_u16( %unused, %zn1, %zn2) { +; CHECK-LABEL: multi_vector_sat_shift_narrow_interleave_x2_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: uqrshrn z0.h, { z2.s, z3.s }, #16 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.uqrshrn.x2.nxv8i16( %zn1, %zn2, i32 16) + ret %res +} + +; +; SQRSHRUN x2 +; + +define @multi_vector_sat_shift_unsigned_narrow_interleave_x2_s16( %unused, %zn1, %zn2) { +; CHECK-LABEL: multi_vector_sat_shift_unsigned_narrow_interleave_x2_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: sqrshrun z0.h, { z2.s, z3.s }, #16 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrshrun.x2.nxv8i16( %zn1, %zn2, i32 16) + ret %res +} + +declare @llvm.aarch64.sve.sqrshrn.x2.nxv8i16(, , i32) +declare @llvm.aarch64.sve.uqrshrn.x2.nxv8i16(, , i32) + +declare @llvm.aarch64.sve.sqrshrun.x2.nxv8i16(, , i32)