diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1021,6 +1021,17 @@ [LLVMSubdivide2VectorType<0>, llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; + class SVE2_1VectorArg_Imm_Narrowing_Intrinsic + : Intrinsic<[LLVMSubdivide2VectorType<0>], + [llvm_anyvector_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<1>]>; + + class SVE2_2VectorArg_Imm_Narrowing_Intrinsic + : Intrinsic<[LLVMSubdivide2VectorType<0>], + [LLVMSubdivide2VectorType<0>, llvm_anyvector_ty, + llvm_i32_ty], + [IntrNoMem, ImmArg<2>]>; + // NOTE: There is no relationship between these intrinsics beyond an attempt // to reuse currently identical class definitions. class AdvSIMD_SVE_LOGB_Intrinsic : AdvSIMD_SVE_CNT_Intrinsic; @@ -1559,4 +1570,32 @@ def int_aarch64_sve_rsubhnb : SVE2_2VectorArg_Narrowing_Intrinsic; def int_aarch64_sve_rsubhnt : SVE2_Merged2VectorArg_Narrowing_Intrinsic; + +// Narrowing shift right +def int_aarch64_sve_shrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic; +def int_aarch64_sve_shrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic; + +def int_aarch64_sve_rshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic; +def int_aarch64_sve_rshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic; + +// Saturating shift right - signed input/output +def int_aarch64_sve_sqshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic; +def int_aarch64_sve_sqshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic; + +def int_aarch64_sve_sqrshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic; +def int_aarch64_sve_sqrshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic; + +// Saturating shift right - unsigned input/output +def int_aarch64_sve_uqshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic; +def int_aarch64_sve_uqshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic; + +def int_aarch64_sve_uqrshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic; +def int_aarch64_sve_uqrshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic; + +// Saturating shift right - signed input, unsigned output +def int_aarch64_sve_sqshrunb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic; +def int_aarch64_sve_sqshrunt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic; + +def int_aarch64_sve_sqrshrunb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic; +def int_aarch64_sve_sqrshrunt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -624,6 +624,30 @@ let ParserMatchClass = Imm1_32Operand; } +// Same as vecshiftR#N, but use TargetConstant (TimmLeaf) instead of Constant +// (ImmLeaf) +def tvecshiftR8 : Operand, TImmLeaf 0) && (((uint32_t)Imm) < 9); +}]> { + let EncoderMethod = "getVecShiftR8OpValue"; + let DecoderMethod = "DecodeVecShiftR8Imm"; + let ParserMatchClass = Imm1_8Operand; +} +def tvecshiftR16 : Operand, TImmLeaf 0) && (((uint32_t)Imm) < 17); +}]> { + let EncoderMethod = "getVecShiftR16OpValue"; + let DecoderMethod = "DecodeVecShiftR16Imm"; + let ParserMatchClass = Imm1_16Operand; +} +def tvecshiftR32 : Operand, TImmLeaf 0) && (((uint32_t)Imm) < 33); +}]> { + let EncoderMethod = "getVecShiftR32OpValue"; + let DecoderMethod = "DecodeVecShiftR32Imm"; + let ParserMatchClass = Imm1_32Operand; +} + def Imm0_1Operand : AsmImmRange<0, 1>; def Imm0_7Operand : AsmImmRange<0, 7>; def Imm0_15Operand : AsmImmRange<0, 15>; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1426,24 +1426,24 @@ defm SBCLT_ZZZ : sve2_int_addsub_long_carry<0b11, "sbclt">; // SVE2 bitwise shift right narrow (bottom) - defm SQSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b000, "sqshrunb">; - defm SQRSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b001, "sqrshrunb">; - defm SHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b010, "shrnb">; - defm RSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b011, "rshrnb">; - defm SQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b100, "sqshrnb">; - defm SQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b101, "sqrshrnb">; - defm UQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b110, "uqshrnb">; - defm UQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b111, "uqrshrnb">; + defm SQSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b000, "sqshrunb", int_aarch64_sve_sqshrunb>; + defm SQRSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b001, "sqrshrunb", int_aarch64_sve_sqrshrunb>; + defm SHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b010, "shrnb", int_aarch64_sve_shrnb>; + defm RSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b011, "rshrnb", int_aarch64_sve_rshrnb>; + defm SQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b100, "sqshrnb", int_aarch64_sve_sqshrnb>; + defm SQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b101, "sqrshrnb", int_aarch64_sve_sqrshrnb>; + defm UQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b110, "uqshrnb", int_aarch64_sve_uqshrnb>; + defm UQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b111, "uqrshrnb", int_aarch64_sve_uqrshrnb>; // SVE2 bitwise shift right narrow (top) - defm SQSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b000, "sqshrunt">; - defm SQRSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b001, "sqrshrunt">; - defm SHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b010, "shrnt">; - defm RSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b011, "rshrnt">; - defm SQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b100, "sqshrnt">; - defm SQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b101, "sqrshrnt">; - defm UQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b110, "uqshrnt">; - defm UQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b111, "uqrshrnt">; + defm SQSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b000, "sqshrunt", int_aarch64_sve_sqshrunt>; + defm SQRSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b001, "sqrshrunt", int_aarch64_sve_sqrshrunt>; + defm SHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b010, "shrnt", int_aarch64_sve_shrnt>; + defm RSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b011, "rshrnt", int_aarch64_sve_rshrnt>; + defm SQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b100, "sqshrnt", int_aarch64_sve_sqshrnt>; + defm SQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b101, "sqrshrnt", int_aarch64_sve_sqrshrnt>; + defm UQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b110, "uqshrnt", int_aarch64_sve_uqshrnt>; + defm UQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b111, "uqrshrnt", int_aarch64_sve_uqrshrnt>; // SVE2 integer add/subtract narrow high part (bottom) defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b00, "addhnb", int_aarch64_sve_addhnb>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -334,6 +334,11 @@ : Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, vt4:$Op4)), (inst $Op1, $Op2, $Op3, $Op4)>; +class SVE_2_Op_Imm_Pat +: Pat<(vtd (op vt1:$Op1, (vt2 ImmTy:$Op2))), + (inst $Op1, ImmTy:$Op2)>; + class SVE_3_Op_Imm_Pat @@ -2965,17 +2970,21 @@ let Inst{4-0} = Zd; } -multiclass sve2_int_bin_shift_imm_right_narrow_bottom opc, string asm> { +multiclass sve2_int_bin_shift_imm_right_narrow_bottom opc, string asm, + SDPatternOperator op> { def _B : sve2_int_bin_shift_imm_narrow_bottom<{0,0,1}, opc, asm, ZPR8, ZPR16, - vecshiftR8>; + tvecshiftR8>; def _H : sve2_int_bin_shift_imm_narrow_bottom<{0,1,?}, opc, asm, ZPR16, ZPR32, - vecshiftR16> { + tvecshiftR16> { let Inst{19} = imm{3}; } def _S : sve2_int_bin_shift_imm_narrow_bottom<{1,?,?}, opc, asm, ZPR32, ZPR64, vecshiftR32> { let Inst{20-19} = imm{4-3}; } + def : SVE_2_Op_Imm_Pat(NAME # _B)>; + def : SVE_2_Op_Imm_Pat(NAME # _H)>; + def : SVE_2_Op_Imm_Pat(NAME # _S)>; } class sve2_int_bin_shift_imm_narrow_top tsz8_64, bits<3> opc, @@ -3001,17 +3010,21 @@ let Constraints = "$Zd = $_Zd"; } -multiclass sve2_int_bin_shift_imm_right_narrow_top opc, string asm> { +multiclass sve2_int_bin_shift_imm_right_narrow_top opc, string asm, + SDPatternOperator op> { def _B : sve2_int_bin_shift_imm_narrow_top<{0,0,1}, opc, asm, ZPR8, ZPR16, - vecshiftR8>; + tvecshiftR8>; def _H : sve2_int_bin_shift_imm_narrow_top<{0,1,?}, opc, asm, ZPR16, ZPR32, - vecshiftR16> { + tvecshiftR16> { let Inst{19} = imm{3}; } def _S : sve2_int_bin_shift_imm_narrow_top<{1,?,?}, opc, asm, ZPR32, ZPR64, vecshiftR32> { let Inst{20-19} = imm{4-3}; } + def : SVE_3_Op_Imm_Pat(NAME # _B)>; + def : SVE_3_Op_Imm_Pat(NAME # _H)>; + def : SVE_3_Op_Imm_Pat(NAME # _S)>; } class sve2_int_addsub_narrow_high_bottom sz, bits<2> opc, string asm, diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-shr.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-shr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-shr.ll @@ -0,0 +1,512 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s + +; +; SHRNB +; + +define @shrnb_h( %a) { +; CHECK-LABEL: shrnb_h: +; CHECK: shrnb z0.b, z0.h, #8 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.shrnb.nxv8i16( %a, + i32 8) + ret %out +} + +define @shrnb_s( %a) { +; CHECK-LABEL: shrnb_s: +; CHECK: shrnb z0.h, z0.s, #16 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.shrnb.nxv4i32( %a, + i32 16) + ret %out +} + +define @shrnb_d( %a) { +; CHECK-LABEL: shrnb_d: +; CHECK: shrnb z0.s, z0.d, #32 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.shrnb.nxv2i64( %a, + i32 32) + ret %out +} + +; +; UQSHRNB +; + +define @uqshrnb_h( %a) { +; CHECK-LABEL: uqshrnb_h: +; CHECK: uqshrnb z0.b, z0.h, #1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqshrnb.nxv8i16( %a, + i32 1) + ret %out +} + +define @uqshrnb_s( %a) { +; CHECK-LABEL: uqshrnb_s: +; CHECK: uqshrnb z0.h, z0.s, #1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqshrnb.nxv4i32( %a, + i32 1) + ret %out +} + +define @uqshrnb_d( %a) { +; CHECK-LABEL: uqshrnb_d: +; CHECK: uqshrnb z0.s, z0.d, #1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqshrnb.nxv2i64( %a, + i32 1) + ret %out +} + +; +; SQSHRNB +; + +define @sqshrnb_h( %a) { +; CHECK-LABEL: sqshrnb_h: +; CHECK: sqshrnb z0.b, z0.h, #1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqshrnb.nxv8i16( %a, + i32 1) + ret %out +} + +define @sqshrnb_s( %a) { +; CHECK-LABEL: sqshrnb_s: +; CHECK: sqshrnb z0.h, z0.s, #1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqshrnb.nxv4i32( %a, + i32 1) + ret %out +} + +define @sqshrnb_d( %a) { +; CHECK-LABEL: sqshrnb_d: +; CHECK: sqshrnb z0.s, z0.d, #1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqshrnb.nxv2i64( %a, + i32 1) + ret %out +} + +; +; SQSHRUNB +; + +define @sqshrunb_h( %a) { +; CHECK-LABEL: qshrunb_h: +; CHECK: sqshrunb z0.b, z0.h, #7 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqshrunb.nxv8i16( %a, + i32 7) + ret %out +} + +define @sqshrunb_s( %a) { +; CHECK-LABEL: sqshrunb_s: +; CHECK: sqshrunb z0.h, z0.s, #15 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqshrunb.nxv4i32( %a, + i32 15) + ret %out +} + +define @sqshrunb_d( %a) { +; CHECK-LABEL: sqshrunb_d: +; CHECK: sqshrunb z0.s, z0.d, #31 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqshrunb.nxv2i64( %a, + i32 31) + ret %out +} + +; +; UQRSHRNB +; + +define @uqrshrnb_h( %a) { +; CHECK-LABEL: uqrshrnb_h: +; CHECK: uqrshrnb z0.b, z0.h, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqrshrnb.nxv8i16( %a, + i32 2) + ret %out +} + +define @uqrshrnb_s( %a) { +; CHECK-LABEL: uqrshrnb_s: +; CHECK: uqrshrnb z0.h, z0.s, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqrshrnb.nxv4i32( %a, + i32 2) + ret %out +} + +define @uqrshrnb_d( %a) { +; CHECK-LABEL: uqrshrnb_d: +; CHECK: uqrshrnb z0.s, z0.d, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqrshrnb.nxv2i64( %a, + i32 2) + ret %out +} + +; +; SQRSHRNB +; + +define @sqrshrnb_h( %a) { +; CHECK-LABEL: sqrshrnb_h: +; CHECK: sqrshrnb z0.b, z0.h, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrshrnb.nxv8i16( %a, + i32 2) + ret %out +} + +define @sqrshrnb_s( %a) { +; CHECK-LABEL: sqrshrnb_s: +; CHECK: sqrshrnb z0.h, z0.s, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrshrnb.nxv4i32( %a, + i32 2) + ret %out +} + +define @sqrshrnb_d( %a) { +; CHECK-LABEL: sqrshrnb_d: +; CHECK: sqrshrnb z0.s, z0.d, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrshrnb.nxv2i64( %a, + i32 2) + ret %out +} + +; +; SQRSHRUNB +; + +define @sqrshrunb_h( %a) { +; CHECK-LABEL: sqrshrunb_h: +; CHECK: sqrshrunb z0.b, z0.h, #6 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrshrunb.nxv8i16( %a, + i32 6) + ret %out +} + +define @sqrshrunb_s( %a) { +; CHECK-LABEL: sqrshrunb_s: +; CHECK: sqrshrunb z0.h, z0.s, #14 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrshrunb.nxv4i32( %a, + i32 14) + ret %out +} + +define @sqrshrunb_d( %a) { +; CHECK-LABEL: sqrshrunb_d: +; CHECK: sqrshrunb z0.s, z0.d, #30 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrshrunb.nxv2i64( %a, + i32 30) + ret %out +} + +; +; SHRNT +; + +define @shrnt_h( %a, %b) { +; CHECK-LABEL: shrnt_h: +; CHECK: shrnt z0.b, z1.h, #3 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.shrnt.nxv8i16( %a, + %b, + i32 3) + ret %out +} + +define @shrnt_s( %a, %b) { +; CHECK-LABEL: shrnt_s: +; CHECK: shrnt z0.h, z1.s, #3 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.shrnt.nxv4i32( %a, + %b, + i32 3) + ret %out +} + +define @shrnt_d( %a, %b) { +; CHECK-LABEL: shrnt_d: +; CHECK: shrnt z0.s, z1.d, #3 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.shrnt.nxv2i64( %a, + %b, + i32 3) + ret %out +} + +; +; UQSHRNT +; + +define @uqshrnt_h( %a, %b) { +; CHECK-LABEL: uqshrnt_h: +; CHECK: uqshrnt z0.b, z1.h, #5 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqshrnt.nxv8i16( %a, + %b, + i32 5) + ret %out +} + +define @uqshrnt_s( %a, %b) { +; CHECK-LABEL: uqshrnt_s: +; CHECK: uqshrnt z0.h, z1.s, #13 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqshrnt.nxv4i32( %a, + %b, + i32 13) + ret %out +} + +define @uqshrnt_d( %a, %b) { +; CHECK-LABEL: uqshrnt_d: +; CHECK: uqshrnt z0.s, z1.d, #29 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqshrnt.nxv2i64( %a, + %b, + i32 29) + ret %out +} + +; +; SQSHRNT +; + +define @sqshrnt_h( %a, %b) { +; CHECK-LABEL: sqshrnt_h: +; CHECK: sqshrnt z0.b, z1.h, #5 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqshrnt.nxv8i16( %a, + %b, + i32 5) + ret %out +} + +define @sqshrnt_s( %a, %b) { +; CHECK-LABEL: sqshrnt_s: +; CHECK: sqshrnt z0.h, z1.s, #13 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqshrnt.nxv4i32( %a, + %b, + i32 13) + ret %out +} + +define @sqshrnt_d( %a, %b) { +; CHECK-LABEL: sqshrnt_d: +; CHECK: sqshrnt z0.s, z1.d, #29 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqshrnt.nxv2i64( %a, + %b, + i32 29) + ret %out +} + +; +; SQSHRUNT +; + +define @sqshrunt_h( %a, %b) { +; CHECK-LABEL: sqshrunt_h: +; CHECK: sqshrunt z0.b, z1.h, #4 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqshrunt.nxv8i16( %a, + %b, + i32 4) + ret %out +} + +define @sqshrunt_s( %a, %b) { +; CHECK-LABEL: sqshrunt_s: +; CHECK: sqshrunt z0.h, z1.s, #4 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqshrunt.nxv4i32( %a, + %b, + i32 4) + ret %out +} + +define @sqshrunt_d( %a, %b) { +; CHECK-LABEL: sqshrunt_d: +; CHECK: sqshrunt z0.s, z1.d, #4 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqshrunt.nxv2i64( %a, + %b, + i32 4) + ret %out +} + +; +; UQRSHRNT +; + +define @uqrshrnt_h( %a, %b) { +; CHECK-LABEL: uqrshrnt_h: +; CHECK: uqrshrnt z0.b, z1.h, #8 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqrshrnt.nxv8i16( %a, + %b, + i32 8) + ret %out +} + +define @uqrshrnt_s( %a, %b) { +; CHECK-LABEL: uqrshrnt_s: +; CHECK: uqrshrnt z0.h, z1.s, #12 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqrshrnt.nxv4i32( %a, + %b, + i32 12) + ret %out +} + +define @uqrshrnt_d( %a, %b) { +; CHECK-LABEL: uqrshrnt_d: +; CHECK: uqrshrnt z0.s, z1.d, #28 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqrshrnt.nxv2i64( %a, + %b, + i32 28) + ret %out +} + +; +; SQRSHRNT +; + +define @sqrshrnt_h( %a, %b) { +; CHECK-LABEL: sqrshrnt_h: +; CHECK: sqrshrnt z0.b, z1.h, #8 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrshrnt.nxv8i16( %a, + %b, + i32 8) + ret %out +} + +define @sqrshrnt_s( %a, %b) { +; CHECK-LABEL: sqrshrnt_s: +; CHECK: sqrshrnt z0.h, z1.s, #12 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrshrnt.nxv4i32( %a, + %b, + i32 12) + ret %out +} + +define @sqrshrnt_d( %a, %b) { +; CHECK-LABEL: sqrshrnt_d: +; CHECK: sqrshrnt z0.s, z1.d, #28 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrshrnt.nxv2i64( %a, + %b, + i32 28) + ret %out +} + +; +; SQRSHRUNT +; + +define @sqrshrunt_h( %a, %b) { +; CHECK-LABEL: sqrshrunt_h: +; CHECK: sqrshrunt z0.b, z1.h, #1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrshrunt.nxv8i16( %a, + %b, + i32 1) + ret %out +} + +define @sqrshrunt_s( %a, %b) { +; CHECK-LABEL: sqrshrunt_s: +; CHECK: sqrshrunt z0.h, z1.s, #5 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrshrunt.nxv4i32( %a, + %b, + i32 5) + ret %out +} + +define @sqrshrunt_d( %a, %b) { +; CHECK-LABEL: sqrshrunt_d: +; CHECK: sqrshrunt z0.s, z1.d, #5 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrshrunt.nxv2i64( %a, + %b, + i32 5) + ret %out +} + +declare @llvm.aarch64.sve.shrnb.nxv8i16(, i32) +declare @llvm.aarch64.sve.shrnb.nxv4i32(, i32) +declare @llvm.aarch64.sve.shrnb.nxv2i64(, i32) + +declare @llvm.aarch64.sve.uqshrnb.nxv8i16(, i32) +declare @llvm.aarch64.sve.uqshrnb.nxv4i32(, i32) +declare @llvm.aarch64.sve.uqshrnb.nxv2i64(, i32) + +declare @llvm.aarch64.sve.sqshrnb.nxv8i16(, i32) +declare @llvm.aarch64.sve.sqshrnb.nxv4i32(, i32) +declare @llvm.aarch64.sve.sqshrnb.nxv2i64(, i32) + +declare @llvm.aarch64.sve.uqrshrnb.nxv8i16(, i32) +declare @llvm.aarch64.sve.uqrshrnb.nxv4i32(, i32) +declare @llvm.aarch64.sve.uqrshrnb.nxv2i64(, i32) + +declare @llvm.aarch64.sve.sqrshrnb.nxv8i16(, i32) +declare @llvm.aarch64.sve.sqrshrnb.nxv4i32(, i32) +declare @llvm.aarch64.sve.sqrshrnb.nxv2i64(, i32) + +declare @llvm.aarch64.sve.sqshrunb.nxv8i16(, i32) +declare @llvm.aarch64.sve.sqshrunb.nxv4i32(, i32) +declare @llvm.aarch64.sve.sqshrunb.nxv2i64(, i32) + +declare @llvm.aarch64.sve.sqrshrunb.nxv8i16(, i32) +declare @llvm.aarch64.sve.sqrshrunb.nxv4i32(, i32) +declare @llvm.aarch64.sve.sqrshrunb.nxv2i64(, i32) + +declare @llvm.aarch64.sve.shrnt.nxv8i16(, , i32) +declare @llvm.aarch64.sve.shrnt.nxv4i32(, , i32) +declare @llvm.aarch64.sve.shrnt.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.uqshrnt.nxv8i16(, , i32) +declare @llvm.aarch64.sve.uqshrnt.nxv4i32(, , i32) +declare @llvm.aarch64.sve.uqshrnt.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.sqshrnt.nxv8i16(, , i32) +declare @llvm.aarch64.sve.sqshrnt.nxv4i32(, , i32) +declare @llvm.aarch64.sve.sqshrnt.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.sqshrunt.nxv8i16(, , i32) +declare @llvm.aarch64.sve.sqshrunt.nxv4i32(, , i32) +declare @llvm.aarch64.sve.sqshrunt.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.uqrshrnt.nxv8i16(, , i32) +declare @llvm.aarch64.sve.uqrshrnt.nxv4i32(, , i32) +declare @llvm.aarch64.sve.uqrshrnt.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.sqrshrnt.nxv8i16(, , i32) +declare @llvm.aarch64.sve.sqrshrnt.nxv4i32(, , i32) +declare @llvm.aarch64.sve.sqrshrnt.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.sqrshrunt.nxv8i16(, , i32) +declare @llvm.aarch64.sve.sqrshrunt.nxv4i32(, , i32) +declare @llvm.aarch64.sve.sqrshrunt.nxv2i64(, , i32)