diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -819,6 +819,29 @@ llvm_nxv2i64_ty], [IntrNoMem]>; + class AdvSIMD_SVE_Saturating_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [IntrNoMem]>; + + class AdvSIMD_SVE_SaturatingWithPattern_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + llvm_i32_ty, + llvm_i32_ty], + [IntrNoMem]>; + + class AdvSIMD_SVE_Saturating_N_Intrinsic + : Intrinsic<[T], + [T, llvm_anyvector_ty], + [IntrNoMem]>; + + class AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic + : Intrinsic<[T], + [T, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + class AdvSIMD_SVE_CNT_Intrinsic : Intrinsic<[LLVMVectorOfBitcastsToInt<0>], [LLVMVectorOfBitcastsToInt<0>, @@ -1135,6 +1158,74 @@ def int_aarch64_sve_cntp : AdvSIMD_SVE_CNTP_Intrinsic; +// +// Saturating scalar arithmetic +// + +def int_aarch64_sve_sqdech : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic; +def int_aarch64_sve_sqdecw : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic; +def int_aarch64_sve_sqdecd : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic; +def int_aarch64_sve_sqdecp : AdvSIMD_SVE_Saturating_Intrinsic; + +def int_aarch64_sve_sqdecb_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_sqdecb_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_sqdech_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_sqdech_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_sqdecw_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_sqdecw_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_sqdecd_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_sqdecd_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_sqdecp_n32 : AdvSIMD_SVE_Saturating_N_Intrinsic; +def int_aarch64_sve_sqdecp_n64 : AdvSIMD_SVE_Saturating_N_Intrinsic; + +def int_aarch64_sve_sqinch : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic; +def int_aarch64_sve_sqincw : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic; +def int_aarch64_sve_sqincd : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic; +def int_aarch64_sve_sqincp : AdvSIMD_SVE_Saturating_Intrinsic; + +def int_aarch64_sve_sqincb_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_sqincb_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_sqinch_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_sqinch_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_sqincw_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_sqincw_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_sqincd_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_sqincd_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_sqincp_n32 : AdvSIMD_SVE_Saturating_N_Intrinsic; +def int_aarch64_sve_sqincp_n64 : AdvSIMD_SVE_Saturating_N_Intrinsic; + +def int_aarch64_sve_uqdech : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic; +def int_aarch64_sve_uqdecw : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic; +def int_aarch64_sve_uqdecd : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic; +def int_aarch64_sve_uqdecp : AdvSIMD_SVE_Saturating_Intrinsic; + +def int_aarch64_sve_uqdecb_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_uqdecb_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_uqdech_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_uqdech_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_uqdecw_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_uqdecw_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_uqdecd_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_uqdecd_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_uqdecp_n32 : AdvSIMD_SVE_Saturating_N_Intrinsic; +def int_aarch64_sve_uqdecp_n64 : AdvSIMD_SVE_Saturating_N_Intrinsic; + +def int_aarch64_sve_uqinch : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic; +def int_aarch64_sve_uqincw : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic; +def int_aarch64_sve_uqincd : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic; +def int_aarch64_sve_uqincp : AdvSIMD_SVE_Saturating_Intrinsic; + +def int_aarch64_sve_uqincb_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_uqincb_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_uqinch_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_uqinch_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_uqincw_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_uqincw_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_uqincd_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_uqincd_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic; +def int_aarch64_sve_uqincp_n32 : AdvSIMD_SVE_Saturating_N_Intrinsic; +def int_aarch64_sve_uqincp_n64 : AdvSIMD_SVE_Saturating_N_Intrinsic; + // // Reversal // diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -813,76 +813,76 @@ defm INCD_XPiI : sve_int_pred_pattern_a<0b110, "incd">; defm DECD_XPiI : sve_int_pred_pattern_a<0b111, "decd">; - defm SQINCB_XPiWdI : sve_int_pred_pattern_b_s32<0b00000, "sqincb">; - defm UQINCB_WPiI : sve_int_pred_pattern_b_u32<0b00001, "uqincb">; - defm SQDECB_XPiWdI : sve_int_pred_pattern_b_s32<0b00010, "sqdecb">; - defm UQDECB_WPiI : sve_int_pred_pattern_b_u32<0b00011, "uqdecb">; - defm SQINCB_XPiI : sve_int_pred_pattern_b_x64<0b00100, "sqincb">; - defm UQINCB_XPiI : sve_int_pred_pattern_b_x64<0b00101, "uqincb">; - defm SQDECB_XPiI : sve_int_pred_pattern_b_x64<0b00110, "sqdecb">; - defm UQDECB_XPiI : sve_int_pred_pattern_b_x64<0b00111, "uqdecb">; - - defm SQINCH_XPiWdI : sve_int_pred_pattern_b_s32<0b01000, "sqinch">; - defm UQINCH_WPiI : sve_int_pred_pattern_b_u32<0b01001, "uqinch">; - defm SQDECH_XPiWdI : sve_int_pred_pattern_b_s32<0b01010, "sqdech">; - defm UQDECH_WPiI : sve_int_pred_pattern_b_u32<0b01011, "uqdech">; - defm SQINCH_XPiI : sve_int_pred_pattern_b_x64<0b01100, "sqinch">; - defm UQINCH_XPiI : sve_int_pred_pattern_b_x64<0b01101, "uqinch">; - defm SQDECH_XPiI : sve_int_pred_pattern_b_x64<0b01110, "sqdech">; - defm UQDECH_XPiI : sve_int_pred_pattern_b_x64<0b01111, "uqdech">; - - defm SQINCW_XPiWdI : sve_int_pred_pattern_b_s32<0b10000, "sqincw">; - defm UQINCW_WPiI : sve_int_pred_pattern_b_u32<0b10001, "uqincw">; - defm SQDECW_XPiWdI : sve_int_pred_pattern_b_s32<0b10010, "sqdecw">; - defm UQDECW_WPiI : sve_int_pred_pattern_b_u32<0b10011, "uqdecw">; - defm SQINCW_XPiI : sve_int_pred_pattern_b_x64<0b10100, "sqincw">; - defm UQINCW_XPiI : sve_int_pred_pattern_b_x64<0b10101, "uqincw">; - defm SQDECW_XPiI : sve_int_pred_pattern_b_x64<0b10110, "sqdecw">; - defm UQDECW_XPiI : sve_int_pred_pattern_b_x64<0b10111, "uqdecw">; - - defm SQINCD_XPiWdI : sve_int_pred_pattern_b_s32<0b11000, "sqincd">; - defm UQINCD_WPiI : sve_int_pred_pattern_b_u32<0b11001, "uqincd">; - defm SQDECD_XPiWdI : sve_int_pred_pattern_b_s32<0b11010, "sqdecd">; - defm UQDECD_WPiI : sve_int_pred_pattern_b_u32<0b11011, "uqdecd">; - defm SQINCD_XPiI : sve_int_pred_pattern_b_x64<0b11100, "sqincd">; - defm UQINCD_XPiI : sve_int_pred_pattern_b_x64<0b11101, "uqincd">; - defm SQDECD_XPiI : sve_int_pred_pattern_b_x64<0b11110, "sqdecd">; - defm UQDECD_XPiI : sve_int_pred_pattern_b_x64<0b11111, "uqdecd">; - - defm SQINCH_ZPiI : sve_int_countvlv<0b01000, "sqinch", ZPR16>; - defm UQINCH_ZPiI : sve_int_countvlv<0b01001, "uqinch", ZPR16>; - defm SQDECH_ZPiI : sve_int_countvlv<0b01010, "sqdech", ZPR16>; - defm UQDECH_ZPiI : sve_int_countvlv<0b01011, "uqdech", ZPR16>; + defm SQINCB_XPiWdI : sve_int_pred_pattern_b_s32<0b00000, "sqincb", int_aarch64_sve_sqincb_n32>; + defm UQINCB_WPiI : sve_int_pred_pattern_b_u32<0b00001, "uqincb", int_aarch64_sve_uqincb_n32>; + defm SQDECB_XPiWdI : sve_int_pred_pattern_b_s32<0b00010, "sqdecb", int_aarch64_sve_sqdecb_n32>; + defm UQDECB_WPiI : sve_int_pred_pattern_b_u32<0b00011, "uqdecb", int_aarch64_sve_uqdecb_n32>; + defm SQINCB_XPiI : sve_int_pred_pattern_b_x64<0b00100, "sqincb", int_aarch64_sve_sqincb_n64>; + defm UQINCB_XPiI : sve_int_pred_pattern_b_x64<0b00101, "uqincb", int_aarch64_sve_uqincb_n64>; + defm SQDECB_XPiI : sve_int_pred_pattern_b_x64<0b00110, "sqdecb", int_aarch64_sve_sqdecb_n64>; + defm UQDECB_XPiI : sve_int_pred_pattern_b_x64<0b00111, "uqdecb", int_aarch64_sve_uqdecb_n64>; + + defm SQINCH_XPiWdI : sve_int_pred_pattern_b_s32<0b01000, "sqinch", int_aarch64_sve_sqinch_n32>; + defm UQINCH_WPiI : sve_int_pred_pattern_b_u32<0b01001, "uqinch", int_aarch64_sve_uqinch_n32>; + defm SQDECH_XPiWdI : sve_int_pred_pattern_b_s32<0b01010, "sqdech", int_aarch64_sve_sqdech_n32>; + defm UQDECH_WPiI : sve_int_pred_pattern_b_u32<0b01011, "uqdech", int_aarch64_sve_uqdech_n32>; + defm SQINCH_XPiI : sve_int_pred_pattern_b_x64<0b01100, "sqinch", int_aarch64_sve_sqinch_n64>; + defm UQINCH_XPiI : sve_int_pred_pattern_b_x64<0b01101, "uqinch", int_aarch64_sve_uqinch_n64>; + defm SQDECH_XPiI : sve_int_pred_pattern_b_x64<0b01110, "sqdech", int_aarch64_sve_sqdech_n64>; + defm UQDECH_XPiI : sve_int_pred_pattern_b_x64<0b01111, "uqdech", int_aarch64_sve_uqdech_n64>; + + defm SQINCW_XPiWdI : sve_int_pred_pattern_b_s32<0b10000, "sqincw", int_aarch64_sve_sqincw_n32>; + defm UQINCW_WPiI : sve_int_pred_pattern_b_u32<0b10001, "uqincw", int_aarch64_sve_uqincw_n32>; + defm SQDECW_XPiWdI : sve_int_pred_pattern_b_s32<0b10010, "sqdecw", int_aarch64_sve_sqdecw_n32>; + defm UQDECW_WPiI : sve_int_pred_pattern_b_u32<0b10011, "uqdecw", int_aarch64_sve_uqdecw_n32>; + defm SQINCW_XPiI : sve_int_pred_pattern_b_x64<0b10100, "sqincw", int_aarch64_sve_sqincw_n64>; + defm UQINCW_XPiI : sve_int_pred_pattern_b_x64<0b10101, "uqincw", int_aarch64_sve_uqincw_n64>; + defm SQDECW_XPiI : sve_int_pred_pattern_b_x64<0b10110, "sqdecw", int_aarch64_sve_sqdecw_n64>; + defm UQDECW_XPiI : sve_int_pred_pattern_b_x64<0b10111, "uqdecw", int_aarch64_sve_uqdecw_n64>; + + defm SQINCD_XPiWdI : sve_int_pred_pattern_b_s32<0b11000, "sqincd", int_aarch64_sve_sqincd_n32>; + defm UQINCD_WPiI : sve_int_pred_pattern_b_u32<0b11001, "uqincd", int_aarch64_sve_uqincd_n32>; + defm SQDECD_XPiWdI : sve_int_pred_pattern_b_s32<0b11010, "sqdecd", int_aarch64_sve_sqdecd_n32>; + defm UQDECD_WPiI : sve_int_pred_pattern_b_u32<0b11011, "uqdecd", int_aarch64_sve_uqdecd_n32>; + defm SQINCD_XPiI : sve_int_pred_pattern_b_x64<0b11100, "sqincd", int_aarch64_sve_sqincd_n64>; + defm UQINCD_XPiI : sve_int_pred_pattern_b_x64<0b11101, "uqincd", int_aarch64_sve_uqincd_n64>; + defm SQDECD_XPiI : sve_int_pred_pattern_b_x64<0b11110, "sqdecd", int_aarch64_sve_sqdecd_n64>; + defm UQDECD_XPiI : sve_int_pred_pattern_b_x64<0b11111, "uqdecd", int_aarch64_sve_uqdecd_n64>; + + defm SQINCH_ZPiI : sve_int_countvlv<0b01000, "sqinch", ZPR16, int_aarch64_sve_sqinch, nxv8i16>; + defm UQINCH_ZPiI : sve_int_countvlv<0b01001, "uqinch", ZPR16, int_aarch64_sve_uqinch, nxv8i16>; + defm SQDECH_ZPiI : sve_int_countvlv<0b01010, "sqdech", ZPR16, int_aarch64_sve_sqdech, nxv8i16>; + defm UQDECH_ZPiI : sve_int_countvlv<0b01011, "uqdech", ZPR16, int_aarch64_sve_uqdech, nxv8i16>; defm INCH_ZPiI : sve_int_countvlv<0b01100, "inch", ZPR16>; defm DECH_ZPiI : sve_int_countvlv<0b01101, "dech", ZPR16>; - defm SQINCW_ZPiI : sve_int_countvlv<0b10000, "sqincw", ZPR32>; - defm UQINCW_ZPiI : sve_int_countvlv<0b10001, "uqincw", ZPR32>; - defm SQDECW_ZPiI : sve_int_countvlv<0b10010, "sqdecw", ZPR32>; - defm UQDECW_ZPiI : sve_int_countvlv<0b10011, "uqdecw", ZPR32>; + defm SQINCW_ZPiI : sve_int_countvlv<0b10000, "sqincw", ZPR32, int_aarch64_sve_sqincw, nxv4i32>; + defm UQINCW_ZPiI : sve_int_countvlv<0b10001, "uqincw", ZPR32, int_aarch64_sve_uqincw, nxv4i32>; + defm SQDECW_ZPiI : sve_int_countvlv<0b10010, "sqdecw", ZPR32, int_aarch64_sve_sqdecw, nxv4i32>; + defm UQDECW_ZPiI : sve_int_countvlv<0b10011, "uqdecw", ZPR32, int_aarch64_sve_uqdecw, nxv4i32>; defm INCW_ZPiI : sve_int_countvlv<0b10100, "incw", ZPR32>; defm DECW_ZPiI : sve_int_countvlv<0b10101, "decw", ZPR32>; - defm SQINCD_ZPiI : sve_int_countvlv<0b11000, "sqincd", ZPR64>; - defm UQINCD_ZPiI : sve_int_countvlv<0b11001, "uqincd", ZPR64>; - defm SQDECD_ZPiI : sve_int_countvlv<0b11010, "sqdecd", ZPR64>; - defm UQDECD_ZPiI : sve_int_countvlv<0b11011, "uqdecd", ZPR64>; + defm SQINCD_ZPiI : sve_int_countvlv<0b11000, "sqincd", ZPR64, int_aarch64_sve_sqincd, nxv2i64>; + defm UQINCD_ZPiI : sve_int_countvlv<0b11001, "uqincd", ZPR64, int_aarch64_sve_uqincd, nxv2i64>; + defm SQDECD_ZPiI : sve_int_countvlv<0b11010, "sqdecd", ZPR64, int_aarch64_sve_sqdecd, nxv2i64>; + defm UQDECD_ZPiI : sve_int_countvlv<0b11011, "uqdecd", ZPR64, int_aarch64_sve_uqdecd, nxv2i64>; defm INCD_ZPiI : sve_int_countvlv<0b11100, "incd", ZPR64>; defm DECD_ZPiI : sve_int_countvlv<0b11101, "decd", ZPR64>; - defm SQINCP_XPWd : sve_int_count_r_s32<0b00000, "sqincp">; - defm SQINCP_XP : sve_int_count_r_x64<0b00010, "sqincp">; - defm UQINCP_WP : sve_int_count_r_u32<0b00100, "uqincp">; - defm UQINCP_XP : sve_int_count_r_x64<0b00110, "uqincp">; - defm SQDECP_XPWd : sve_int_count_r_s32<0b01000, "sqdecp">; - defm SQDECP_XP : sve_int_count_r_x64<0b01010, "sqdecp">; - defm UQDECP_WP : sve_int_count_r_u32<0b01100, "uqdecp">; - defm UQDECP_XP : sve_int_count_r_x64<0b01110, "uqdecp">; + defm SQINCP_XPWd : sve_int_count_r_s32<0b00000, "sqincp", int_aarch64_sve_sqincp_n32>; + defm SQINCP_XP : sve_int_count_r_x64<0b00010, "sqincp", int_aarch64_sve_sqincp_n64>; + defm UQINCP_WP : sve_int_count_r_u32<0b00100, "uqincp", int_aarch64_sve_uqincp_n32>; + defm UQINCP_XP : sve_int_count_r_x64<0b00110, "uqincp", int_aarch64_sve_uqincp_n64>; + defm SQDECP_XPWd : sve_int_count_r_s32<0b01000, "sqdecp", int_aarch64_sve_sqdecp_n32>; + defm SQDECP_XP : sve_int_count_r_x64<0b01010, "sqdecp", int_aarch64_sve_sqdecp_n64>; + defm UQDECP_WP : sve_int_count_r_u32<0b01100, "uqdecp", int_aarch64_sve_uqdecp_n32>; + defm UQDECP_XP : sve_int_count_r_x64<0b01110, "uqdecp", int_aarch64_sve_uqdecp_n64>; defm INCP_XP : sve_int_count_r_x64<0b10000, "incp">; defm DECP_XP : sve_int_count_r_x64<0b10100, "decp">; - defm SQINCP_ZP : sve_int_count_v<0b00000, "sqincp">; - defm UQINCP_ZP : sve_int_count_v<0b00100, "uqincp">; - defm SQDECP_ZP : sve_int_count_v<0b01000, "sqdecp">; - defm UQDECP_ZP : sve_int_count_v<0b01100, "uqdecp">; + defm SQINCP_ZP : sve_int_count_v<0b00000, "sqincp", int_aarch64_sve_sqincp>; + defm UQINCP_ZP : sve_int_count_v<0b00100, "uqincp", int_aarch64_sve_uqincp>; + defm SQDECP_ZP : sve_int_count_v<0b01000, "sqdecp", int_aarch64_sve_sqdecp>; + defm UQDECP_ZP : sve_int_count_v<0b01100, "uqdecp", int_aarch64_sve_uqdecp>; defm INCP_ZP : sve_int_count_v<0b10000, "incp">; defm DECP_ZP : sve_int_count_v<0b10100, "decp">; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -433,25 +433,55 @@ let Constraints = "$Rdn = $_Rdn"; } -multiclass sve_int_count_r_s32 opc, string asm> { +multiclass sve_int_count_r_s32 opc, string asm, + SDPatternOperator op> { def _B : sve_int_count_r<0b00, opc, asm, GPR64z, PPR8, GPR64as32>; def _H : sve_int_count_r<0b01, opc, asm, GPR64z, PPR16, GPR64as32>; def _S : sve_int_count_r<0b10, opc, asm, GPR64z, PPR32, GPR64as32>; def _D : sve_int_count_r<0b11, opc, asm, GPR64z, PPR64, GPR64as32>; + + def : Pat<(i32 (op GPR32:$Rn, (nxv16i1 PPRAny:$Pg))), + (EXTRACT_SUBREG (!cast(NAME # _B) PPRAny:$Pg, (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub_32)), sub_32)>; + def : Pat<(i32 (op GPR32:$Rn, (nxv8i1 PPRAny:$Pg))), + (EXTRACT_SUBREG (!cast(NAME # _H) PPRAny:$Pg, (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub_32)), sub_32)>; + def : Pat<(i32 (op GPR32:$Rn, (nxv4i1 PPRAny:$Pg))), + (EXTRACT_SUBREG (!cast(NAME # _S) PPRAny:$Pg, (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub_32)), sub_32)>; + def : Pat<(i32 (op GPR32:$Rn, (nxv2i1 PPRAny:$Pg))), + (EXTRACT_SUBREG (!cast(NAME # _D) PPRAny:$Pg, (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub_32)), sub_32)>; } -multiclass sve_int_count_r_u32 opc, string asm> { +multiclass sve_int_count_r_u32 opc, string asm, + SDPatternOperator op> { def _B : sve_int_count_r<0b00, opc, asm, GPR32z, PPR8, GPR32z>; def _H : sve_int_count_r<0b01, opc, asm, GPR32z, PPR16, GPR32z>; def _S : sve_int_count_r<0b10, opc, asm, GPR32z, PPR32, GPR32z>; def _D : sve_int_count_r<0b11, opc, asm, GPR32z, PPR64, GPR32z>; + + def : Pat<(i32 (op GPR32:$Rn, (nxv16i1 PPRAny:$Pg))), + (!cast(NAME # _B) PPRAny:$Pg, $Rn)>; + def : Pat<(i32 (op GPR32:$Rn, (nxv8i1 PPRAny:$Pg))), + (!cast(NAME # _H) PPRAny:$Pg, $Rn)>; + def : Pat<(i32 (op GPR32:$Rn, (nxv4i1 PPRAny:$Pg))), + (!cast(NAME # _S) PPRAny:$Pg, $Rn)>; + def : Pat<(i32 (op GPR32:$Rn, (nxv2i1 PPRAny:$Pg))), + (!cast(NAME # _D) PPRAny:$Pg, $Rn)>; } -multiclass sve_int_count_r_x64 opc, string asm> { +multiclass sve_int_count_r_x64 opc, string asm, + SDPatternOperator op = null_frag> { def _B : sve_int_count_r<0b00, opc, asm, GPR64z, PPR8, GPR64z>; def _H : sve_int_count_r<0b01, opc, asm, GPR64z, PPR16, GPR64z>; def _S : sve_int_count_r<0b10, opc, asm, GPR64z, PPR32, GPR64z>; def _D : sve_int_count_r<0b11, opc, asm, GPR64z, PPR64, GPR64z>; + + def : Pat<(i64 (op GPR64:$Rn, (nxv16i1 PPRAny:$Pg))), + (!cast(NAME # _B) PPRAny:$Pg, $Rn)>; + def : Pat<(i64 (op GPR64:$Rn, (nxv8i1 PPRAny:$Pg))), + (!cast(NAME # _H) PPRAny:$Pg, $Rn)>; + def : Pat<(i64 (op GPR64:$Rn, (nxv4i1 PPRAny:$Pg))), + (!cast(NAME # _S) PPRAny:$Pg, $Rn)>; + def : Pat<(i64 (op GPR64:$Rn, (nxv2i1 PPRAny:$Pg))), + (!cast(NAME # _D) PPRAny:$Pg, $Rn)>; } class sve_int_count_v sz8_64, bits<5> opc, string asm, @@ -476,11 +506,16 @@ let ElementSize = ElementSizeNone; } -multiclass sve_int_count_v opc, string asm> { +multiclass sve_int_count_v opc, string asm, + SDPatternOperator op = null_frag> { def _H : sve_int_count_v<0b01, opc, asm, ZPR16, PPR16>; def _S : sve_int_count_v<0b10, opc, asm, ZPR32, PPR32>; def _D : sve_int_count_v<0b11, opc, asm, ZPR64, PPR64>; + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; + def : InstAlias(NAME # "_H") ZPR16:$Zdn, PPRAny:$Pm), 0>; def : InstAlias opc, string asm, ZPRRegOp zprty> { +multiclass sve_int_countvlv opc, string asm, ZPRRegOp zprty, + SDPatternOperator op = null_frag, + ValueType vt = OtherVT> { def NAME : sve_int_countvlv; def : InstAlias(NAME) zprty:$Zdn, sve_pred_enum:$pattern, 1), 1>; def : InstAlias(NAME) zprty:$Zdn, 0b11111, 1), 2>; + + def : Pat<(vt (op (vt zprty:$Zn), (sve_pred_enum:$pattern), (sve_incdec_imm:$imm4))), + (!cast(NAME) $Zn, sve_pred_enum:$pattern, sve_incdec_imm:$imm4)>; } class sve_int_pred_pattern_a opc, string asm> @@ -644,31 +684,46 @@ let Constraints = "$Rdn = $_Rdn"; } -multiclass sve_int_pred_pattern_b_s32 opc, string asm> { +multiclass sve_int_pred_pattern_b_s32 opc, string asm, + SDPatternOperator op> { def NAME : sve_int_pred_pattern_b; def : InstAlias(NAME) GPR64z:$Rd, GPR64as32:$Rn, sve_pred_enum:$pattern, 1), 1>; def : InstAlias(NAME) GPR64z:$Rd, GPR64as32:$Rn, 0b11111, 1), 2>; + + // NOTE: Register allocation doesn't like tied operands of differing register + // class, hence the extra INSERT_SUBREG complication. + + def : Pat<(i32 (op GPR32:$Rn, (sve_pred_enum:$pattern), (sve_incdec_imm:$imm4))), + (EXTRACT_SUBREG (!cast(NAME) (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub_32), sve_pred_enum:$pattern, sve_incdec_imm:$imm4), sub_32)>; } -multiclass sve_int_pred_pattern_b_u32 opc, string asm> { +multiclass sve_int_pred_pattern_b_u32 opc, string asm, + SDPatternOperator op> { def NAME : sve_int_pred_pattern_b; def : InstAlias(NAME) GPR32z:$Rdn, sve_pred_enum:$pattern, 1), 1>; def : InstAlias(NAME) GPR32z:$Rdn, 0b11111, 1), 2>; + + def : Pat<(i32 (op GPR32:$Rn, (sve_pred_enum:$pattern), (sve_incdec_imm:$imm4))), + (!cast(NAME) $Rn, sve_pred_enum:$pattern, sve_incdec_imm:$imm4)>; } -multiclass sve_int_pred_pattern_b_x64 opc, string asm> { +multiclass sve_int_pred_pattern_b_x64 opc, string asm, + SDPatternOperator op> { def NAME : sve_int_pred_pattern_b; def : InstAlias(NAME) GPR64z:$Rdn, sve_pred_enum:$pattern, 1), 1>; def : InstAlias(NAME) GPR64z:$Rdn, 0b11111, 1), 2>; + + def : Pat<(i64 (op GPR64:$Rn, (sve_pred_enum:$pattern), (sve_incdec_imm:$imm4))), + (!cast(NAME) $Rn, sve_pred_enum:$pattern, sve_incdec_imm:$imm4)>; } diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-saturating-scalar-arith.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-saturating-scalar-arith.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-saturating-scalar-arith.ll @@ -0,0 +1,1013 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s + +; +; SQDECH (vector) +; + +define @sqdech( %a) { +; CHECK-LABEL: sqdech: +; CHECK: sqdech z0.h, pow2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdech.nxv8i16( %a, + i32 0, i32 1) + ret %out +} + +; +; SQDECW (vector) +; + +define @sqdecw( %a) { +; CHECK-LABEL: sqdecw: +; CHECK: sqdecw z0.s, vl1, mul #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdecw.nxv4i32( %a, + i32 1, i32 2) + ret %out +} + +; +; SQDECD (vector) +; + +define @sqdecd( %a) { +; CHECK-LABEL: sqdecd: +; CHECK: sqdecd z0.d, vl2, mul #3 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdecd.nxv2i64( %a, + i32 2, i32 3) + ret %out +} + +; +; SQDECP (vector) +; + +define @sqdecp_b16( %a, %b) { +; CHECK-LABEL: sqdecp_b16: +; CHECK: sqdecp z0.h, p0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdecp.nxv8i16( %a, + %b) + ret %out +} + +define @sqdecp_b32( %a, %b) { +; CHECK-LABEL: sqdecp_b32: +; CHECK: sqdecp z0.s, p0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdecp.nxv4i32( %a, + %b) + ret %out +} + +define @sqdecp_b64( %a, %b) { +; CHECK-LABEL: sqdecp_b64: +; CHECK: sqdecp z0.d, p0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdecp.nxv2i64( %a, + %b) + ret %out +} + +; +; SQDECB (scalar) +; + +define i32 @sqdecb_n32(i32 %a) { +; CHECK-LABEL: sqdecb_n32: +; CHECK: sqdecb x0, w0, vl3, mul #4 +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.sqdecb.n32(i32 %a, i32 3, i32 4) + ret i32 %out +} + +define i64 @sqdecb_n64(i64 %a) { +; CHECK-LABEL: sqdecb_n64: +; CHECK: sqdecb x0, vl4, mul #5 +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.sqdecb.n64(i64 %a, i32 4, i32 5) + ret i64 %out +} + +; +; SQDECH (scalar) +; + +define i32 @sqdech_n32(i32 %a) { +; CHECK-LABEL: sqdech_n32: +; CHECK: sqdech x0, w0, vl5, mul #6 +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.sqdech.n32(i32 %a, i32 5, i32 6) + ret i32 %out +} + +define i64 @sqdech_n64(i64 %a) { +; CHECK-LABEL: sqdech_n64: +; CHECK: sqdech x0, vl6, mul #7 +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.sqdech.n64(i64 %a, i32 6, i32 7) + ret i64 %out +} + +; +; SQDECW (scalar) +; + +define i32 @sqdecw_n32(i32 %a) { +; CHECK-LABEL: sqdecw_n32: +; CHECK: sqdecw x0, w0, vl7, mul #8 +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.sqdecw.n32(i32 %a, i32 7, i32 8) + ret i32 %out +} + +define i64 @sqdecw_n64(i64 %a) { +; CHECK-LABEL: sqdecw_n64: +; CHECK: sqdecw x0, vl8, mul #9 +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.sqdecw.n64(i64 %a, i32 8, i32 9) + ret i64 %out +} + +; +; SQDECD (scalar) +; + +define i32 @sqdecd_n32(i32 %a) { +; CHECK-LABEL: sqdecd_n32: +; CHECK: sqdecd x0, w0, vl16, mul #10 +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.sqdecd.n32(i32 %a, i32 9, i32 10) + ret i32 %out +} + +define i64 @sqdecd_n64(i64 %a) { +; CHECK-LABEL: sqdecd_n64: +; CHECK: sqdecd x0, vl32, mul #11 +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.sqdecd.n64(i64 %a, i32 10, i32 11) + ret i64 %out +} + +; +; SQDECP (scalar) +; + +define i32 @sqdecp_n32_b8(i32 %a, %b) { +; CHECK-LABEL: sqdecp_n32_b8: +; CHECK: sqdecp x0, p0.b, w0 +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv16i1(i32 %a, %b) + ret i32 %out +} + +define i32 @sqdecp_n32_b16(i32 %a, %b) { +; CHECK-LABEL: sqdecp_n32_b16: +; CHECK: sqdecp x0, p0.h, w0 +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv8i1(i32 %a, %b) + ret i32 %out +} + +define i32 @sqdecp_n32_b32(i32 %a, %b) { +; CHECK-LABEL: sqdecp_n32_b32: +; CHECK: sqdecp x0, p0.s, w0 +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv4i1(i32 %a, %b) + ret i32 %out +} + +define i32 @sqdecp_n32_b64(i32 %a, %b) { +; CHECK-LABEL: sqdecp_n32_b64: +; CHECK: sqdecp x0, p0.d, w0 +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv2i1(i32 %a, %b) + ret i32 %out +} + +define i64 @sqdecp_n64_b8(i64 %a, %b) { +; CHECK-LABEL: sqdecp_n64_b8: +; CHECK: sqdecp x0, p0.b +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.sqdecp.n64.nxv16i1(i64 %a, %b) + ret i64 %out +} + +define i64 @sqdecp_n64_b16(i64 %a, %b) { +; CHECK-LABEL: sqdecp_n64_b16: +; CHECK: sqdecp x0, p0.h +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.sqdecp.n64.nxv8i1(i64 %a, %b) + ret i64 %out +} + +define i64 @sqdecp_n64_b32(i64 %a, %b) { +; CHECK-LABEL: sqdecp_n64_b32: +; CHECK: sqdecp x0, p0.s +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.sqdecp.n64.nxv4i1(i64 %a, %b) + ret i64 %out +} + +define i64 @sqdecp_n64_b64(i64 %a, %b) { +; CHECK-LABEL: sqdecp_n64_b64: +; CHECK: sqdecp x0, p0.d +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.sqdecp.n64.nxv2i1(i64 %a, %b) + ret i64 %out +} + +; +; SQINCH (vector) +; + +define @sqinch( %a) { +; CHECK-LABEL: sqinch: +; CHECK: sqinch z0.h, pow2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqinch.nxv8i16( %a, + i32 0, i32 1) + ret %out +} + +; +; SQINCW (vector) +; + +define @sqincw( %a) { +; CHECK-LABEL: sqincw: +; CHECK: sqincw z0.s, vl1, mul #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqincw.nxv4i32( %a, + i32 1, i32 2) + ret %out +} + +; +; SQINCD (vector) +; + +define @sqincd( %a) { +; CHECK-LABEL: sqincd: +; CHECK: sqincd z0.d, vl2, mul #3 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqincd.nxv2i64( %a, + i32 2, i32 3) + ret %out +} + +; +; SQINCP (vector) +; + +define @sqincp_b16( %a, %b) { +; CHECK-LABEL: sqincp_b16: +; CHECK: sqincp z0.h, p0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqincp.nxv8i16( %a, + %b) + ret %out +} + +define @sqincp_b32( %a, %b) { +; CHECK-LABEL: sqincp_b32: +; CHECK: sqincp z0.s, p0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqincp.nxv4i32( %a, + %b) + ret %out +} + +define @sqincp_b64( %a, %b) { +; CHECK-LABEL: sqincp_b64: +; CHECK: sqincp z0.d, p0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqincp.nxv2i64( %a, + %b) + ret %out +} + +; +; SQINCB (scalar) +; + +define i32 @sqincb_n32(i32 %a) { +; CHECK-LABEL: sqincb_n32: +; CHECK: sqincb x0, w0, vl3, mul #4 +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.sqincb.n32(i32 %a, i32 3, i32 4) + ret i32 %out +} + +define i64 @sqincb_n64(i64 %a) { +; CHECK-LABEL: sqincb_n64: +; CHECK: sqincb x0, vl4, mul #5 +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.sqincb.n64(i64 %a, i32 4, i32 5) + ret i64 %out +} + +; +; SQINCH (scalar) +; + +define i32 @sqinch_n32(i32 %a) { +; CHECK-LABEL: sqinch_n32: +; CHECK: sqinch x0, w0, vl5, mul #6 +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.sqinch.n32(i32 %a, i32 5, i32 6) + ret i32 %out +} + +define i64 @sqinch_n64(i64 %a) { +; CHECK-LABEL: sqinch_n64: +; CHECK: sqinch x0, vl6, mul #7 +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.sqinch.n64(i64 %a, i32 6, i32 7) + ret i64 %out +} + +; +; SQINCW (scalar) +; + +define i32 @sqincw_n32(i32 %a) { +; CHECK-LABEL: sqincw_n32: +; CHECK: sqincw x0, w0, vl7, mul #8 +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.sqincw.n32(i32 %a, i32 7, i32 8) + ret i32 %out +} + +define i64 @sqincw_n64(i64 %a) { +; CHECK-LABEL: sqincw_n64: +; CHECK: sqincw x0, vl8, mul #9 +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.sqincw.n64(i64 %a, i32 8, i32 9) + ret i64 %out +} + +; +; SQINCD (scalar) +; + +define i32 @sqincd_n32(i32 %a) { +; CHECK-LABEL: sqincd_n32: +; CHECK: sqincd x0, w0, vl16, mul #10 +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.sqincd.n32(i32 %a, i32 9, i32 10) + ret i32 %out +} + +define i64 @sqincd_n64(i64 %a) { +; CHECK-LABEL: sqincd_n64: +; CHECK: sqincd x0, vl32, mul #11 +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.sqincd.n64(i64 %a, i32 10, i32 11) + ret i64 %out +} + +; +; SQINCP (scalar) +; + +define i32 @sqincp_n32_b8(i32 %a, %b) { +; CHECK-LABEL: sqincp_n32_b8: +; CHECK: sqincp x0, p0.b, w0 +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv16i1(i32 %a, %b) + ret i32 %out +} + +define i32 @sqincp_n32_b16(i32 %a, %b) { +; CHECK-LABEL: sqincp_n32_b16: +; CHECK: sqincp x0, p0.h, w0 +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv8i1(i32 %a, %b) + ret i32 %out +} + +define i32 @sqincp_n32_b32(i32 %a, %b) { +; CHECK-LABEL: sqincp_n32_b32: +; CHECK: sqincp x0, p0.s, w0 +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv4i1(i32 %a, %b) + ret i32 %out +} + +define i32 @sqincp_n32_b64(i32 %a, %b) { +; CHECK-LABEL: sqincp_n32_b64: +; CHECK: sqincp x0, p0.d, w0 +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv2i1(i32 %a, %b) + ret i32 %out +} + +define i64 @sqincp_n64_b8(i64 %a, %b) { +; CHECK-LABEL: sqincp_n64_b8: +; CHECK: sqincp x0, p0.b +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.sqincp.n64.nxv16i1(i64 %a, %b) + ret i64 %out +} + +define i64 @sqincp_n64_b16(i64 %a, %b) { +; CHECK-LABEL: sqincp_n64_b16: +; CHECK: sqincp x0, p0.h +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.sqincp.n64.nxv8i1(i64 %a, %b) + ret i64 %out +} + +define i64 @sqincp_n64_b32(i64 %a, %b) { +; CHECK-LABEL: sqincp_n64_b32: +; CHECK: sqincp x0, p0.s +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.sqincp.n64.nxv4i1(i64 %a, %b) + ret i64 %out +} + +define i64 @sqincp_n64_b64(i64 %a, %b) { +; CHECK-LABEL: sqincp_n64_b64: +; CHECK: sqincp x0, p0.d +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.sqincp.n64.nxv2i1(i64 %a, %b) + ret i64 %out +} + +; +; UQDECH (vector) +; + +define @uqdech( %a) { +; CHECK-LABEL: uqdech: +; CHECK: uqdech z0.h, pow2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqdech.nxv8i16( %a, + i32 0, i32 1) + ret %out +} + +; +; UQDECW (vector) +; + +define @uqdecw( %a) { +; CHECK-LABEL: uqdecw: +; CHECK: uqdecw z0.s, vl1, mul #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqdecw.nxv4i32( %a, + i32 1, i32 2) + ret %out +} + +; +; UQDECD (vector) +; + +define @uqdecd( %a) { +; CHECK-LABEL: uqdecd: +; CHECK: uqdecd z0.d, vl2, mul #3 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqdecd.nxv2i64( %a, + i32 2, i32 3) + ret %out +} + +; +; UQDECP (vector) +; + +define @uqdecp_b16( %a, %b) { +; CHECK-LABEL: uqdecp_b16: +; CHECK: uqdecp z0.h, p0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqdecp.nxv8i16( %a, + %b) + ret %out +} + +define @uqdecp_b32( %a, %b) { +; CHECK-LABEL: uqdecp_b32: +; CHECK: uqdecp z0.s, p0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqdecp.nxv4i32( %a, + %b) + ret %out +} + +define @uqdecp_b64( %a, %b) { +; CHECK-LABEL: uqdecp_b64: +; CHECK: uqdecp z0.d, p0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqdecp.nxv2i64( %a, + %b) + ret %out +} + +; +; UQDECB (scalar) +; + +define i32 @uqdecb_n32(i32 %a) { +; CHECK-LABEL: uqdecb_n32: +; CHECK: uqdecb w0, vl3, mul #4 +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.uqdecb.n32(i32 %a, i32 3, i32 4) + ret i32 %out +} + +define i64 @uqdecb_n64(i64 %a) { +; CHECK-LABEL: uqdecb_n64: +; CHECK: uqdecb x0, vl4, mul #5 +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.uqdecb.n64(i64 %a, i32 4, i32 5) + ret i64 %out +} + +; +; UQDECH (scalar) +; + +define i32 @uqdech_n32(i32 %a) { +; CHECK-LABEL: uqdech_n32: +; CHECK: uqdech w0, vl5, mul #6 +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.uqdech.n32(i32 %a, i32 5, i32 6) + ret i32 %out +} + +define i64 @uqdech_n64(i64 %a) { +; CHECK-LABEL: uqdech_n64: +; CHECK: uqdech x0, vl6, mul #7 +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.uqdech.n64(i64 %a, i32 6, i32 7) + ret i64 %out +} + +; +; UQDECW (scalar) +; + +define i32 @uqdecw_n32(i32 %a) { +; CHECK-LABEL: uqdecw_n32: +; CHECK: uqdecw w0, vl7, mul #8 +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.uqdecw.n32(i32 %a, i32 7, i32 8) + ret i32 %out +} + +define i64 @uqdecw_n64(i64 %a) { +; CHECK-LABEL: uqdecw_n64: +; CHECK: uqdecw x0, vl8, mul #9 +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.uqdecw.n64(i64 %a, i32 8, i32 9) + ret i64 %out +} + +; +; UQDECD (scalar) +; + +define i32 @uqdecd_n32(i32 %a) { +; CHECK-LABEL: uqdecd_n32: +; CHECK: uqdecd w0, vl16, mul #10 +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.uqdecd.n32(i32 %a, i32 9, i32 10) + ret i32 %out +} + +define i64 @uqdecd_n64(i64 %a) { +; CHECK-LABEL: uqdecd_n64: +; CHECK: uqdecd x0, vl32, mul #11 +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.uqdecd.n64(i64 %a, i32 10, i32 11) + ret i64 %out +} + +; +; UQDECP (scalar) +; + +define i32 @uqdecp_n32_b8(i32 %a, %b) { +; CHECK-LABEL: uqdecp_n32_b8: +; CHECK: uqdecp w0, p0.b +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.uqdecp.n32.nxv16i1(i32 %a, %b) + ret i32 %out +} + +define i32 @uqdecp_n32_b16(i32 %a, %b) { +; CHECK-LABEL: uqdecp_n32_b16: +; CHECK: uqdecp w0, p0.h +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.uqdecp.n32.nxv8i1(i32 %a, %b) + ret i32 %out +} + +define i32 @uqdecp_n32_b32(i32 %a, %b) { +; CHECK-LABEL: uqdecp_n32_b32: +; CHECK: uqdecp w0, p0.s +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.uqdecp.n32.nxv4i1(i32 %a, %b) + ret i32 %out +} + +define i32 @uqdecp_n32_b64(i32 %a, %b) { +; CHECK-LABEL: uqdecp_n32_b64: +; CHECK: uqdecp w0, p0.d +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.uqdecp.n32.nxv2i1(i32 %a, %b) + ret i32 %out +} + +define i64 @uqdecp_n64_b8(i64 %a, %b) { +; CHECK-LABEL: uqdecp_n64_b8: +; CHECK: uqdecp x0, p0.b +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.uqdecp.n64.nxv16i1(i64 %a, %b) + ret i64 %out +} + +define i64 @uqdecp_n64_b16(i64 %a, %b) { +; CHECK-LABEL: uqdecp_n64_b16: +; CHECK: uqdecp x0, p0.h +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.uqdecp.n64.nxv8i1(i64 %a, %b) + ret i64 %out +} + +define i64 @uqdecp_n64_b32(i64 %a, %b) { +; CHECK-LABEL: uqdecp_n64_b32: +; CHECK: uqdecp x0, p0.s +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.uqdecp.n64.nxv4i1(i64 %a, %b) + ret i64 %out +} + +define i64 @uqdecp_n64_b64(i64 %a, %b) { +; CHECK-LABEL: uqdecp_n64_b64: +; CHECK: uqdecp x0, p0.d +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.uqdecp.n64.nxv2i1(i64 %a, %b) + ret i64 %out +} + +; +; UQINCH (vector) +; + +define @uqinch( %a) { +; CHECK-LABEL: uqinch: +; CHECK: uqinch z0.h, pow2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqinch.nxv8i16( %a, + i32 0, i32 1) + ret %out +} + +; +; UQINCW (vector) +; + +define @uqincw( %a) { +; CHECK-LABEL: uqincw: +; CHECK: uqincw z0.s, vl1, mul #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqincw.nxv4i32( %a, + i32 1, i32 2) + ret %out +} + +; +; UQINCD (vector) +; + +define @uqincd( %a) { +; CHECK-LABEL: uqincd: +; CHECK: uqincd z0.d, vl2, mul #3 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqincd.nxv2i64( %a, + i32 2, i32 3) + ret %out +} + +; +; UQINCP (vector) +; + +define @uqincp_b16( %a, %b) { +; CHECK-LABEL: uqincp_b16: +; CHECK: uqincp z0.h, p0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqincp.nxv8i16( %a, + %b) + ret %out +} + +define @uqincp_b32( %a, %b) { +; CHECK-LABEL: uqincp_b32: +; CHECK: uqincp z0.s, p0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqincp.nxv4i32( %a, + %b) + ret %out +} + +define @uqincp_b64( %a, %b) { +; CHECK-LABEL: uqincp_b64: +; CHECK: uqincp z0.d, p0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqincp.nxv2i64( %a, + %b) + ret %out +} + +; +; UQINCB (scalar) +; + +define i32 @uqincb_n32(i32 %a) { +; CHECK-LABEL: uqincb_n32: +; CHECK: uqincb w0, vl3, mul #4 +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.uqincb.n32(i32 %a, i32 3, i32 4) + ret i32 %out +} + +define i64 @uqincb_n64(i64 %a) { +; CHECK-LABEL: uqincb_n64: +; CHECK: uqincb x0, vl4, mul #5 +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.uqincb.n64(i64 %a, i32 4, i32 5) + ret i64 %out +} + +; +; UQINCH (scalar) +; + +define i32 @uqinch_n32(i32 %a) { +; CHECK-LABEL: uqinch_n32: +; CHECK: uqinch w0, vl5, mul #6 +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.uqinch.n32(i32 %a, i32 5, i32 6) + ret i32 %out +} + +define i64 @uqinch_n64(i64 %a) { +; CHECK-LABEL: uqinch_n64: +; CHECK: uqinch x0, vl6, mul #7 +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.uqinch.n64(i64 %a, i32 6, i32 7) + ret i64 %out +} + +; +; UQINCW (scalar) +; + +define i32 @uqincw_n32(i32 %a) { +; CHECK-LABEL: uqincw_n32: +; CHECK: uqincw w0, vl7, mul #8 +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.uqincw.n32(i32 %a, i32 7, i32 8) + ret i32 %out +} + +define i64 @uqincw_n64(i64 %a) { +; CHECK-LABEL: uqincw_n64: +; CHECK: uqincw x0, vl8, mul #9 +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.uqincw.n64(i64 %a, i32 8, i32 9) + ret i64 %out +} + +; +; UQINCD (scalar) +; + +define i32 @uqincd_n32(i32 %a) { +; CHECK-LABEL: uqincd_n32: +; CHECK: uqincd w0, vl16, mul #10 +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.uqincd.n32(i32 %a, i32 9, i32 10) + ret i32 %out +} + +define i64 @uqincd_n64(i64 %a) { +; CHECK-LABEL: uqincd_n64: +; CHECK: uqincd x0, vl32, mul #11 +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.uqincd.n64(i64 %a, i32 10, i32 11) + ret i64 %out +} + +; +; UQINCP (scalar) +; + +define i32 @uqincp_n32_b8(i32 %a, %b) { +; CHECK-LABEL: uqincp_n32_b8: +; CHECK: uqincp w0, p0.b +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.uqincp.n32.nxv16i1(i32 %a, %b) + ret i32 %out +} + +define i32 @uqincp_n32_b16(i32 %a, %b) { +; CHECK-LABEL: uqincp_n32_b16: +; CHECK: uqincp w0, p0.h +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.uqincp.n32.nxv8i1(i32 %a, %b) + ret i32 %out +} + +define i32 @uqincp_n32_b32(i32 %a, %b) { +; CHECK-LABEL: uqincp_n32_b32: +; CHECK: uqincp w0, p0.s +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.uqincp.n32.nxv4i1(i32 %a, %b) + ret i32 %out +} + +define i32 @uqincp_n32_b64(i32 %a, %b) { +; CHECK-LABEL: uqincp_n32_b64: +; CHECK: uqincp w0, p0.d +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.uqincp.n32.nxv2i1(i32 %a, %b) + ret i32 %out +} + +define i64 @uqincp_n64_b8(i64 %a, %b) { +; CHECK-LABEL: uqincp_n64_b8: +; CHECK: uqincp x0, p0.b +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.uqincp.n64.nxv16i1(i64 %a, %b) + ret i64 %out +} + +define i64 @uqincp_n64_b16(i64 %a, %b) { +; CHECK-LABEL: uqincp_n64_b16: +; CHECK: uqincp x0, p0.h +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.uqincp.n64.nxv8i1(i64 %a, %b) + ret i64 %out +} + +define i64 @uqincp_n64_b32(i64 %a, %b) { +; CHECK-LABEL: uqincp_n64_b32: +; CHECK: uqincp x0, p0.s +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.uqincp.n64.nxv4i1(i64 %a, %b) + ret i64 %out +} + +define i64 @uqincp_n64_b64(i64 %a, %b) { +; CHECK-LABEL: uqincp_n64_b64: +; CHECK: uqincp x0, p0.d +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.uqincp.n64.nxv2i1(i64 %a, %b) + ret i64 %out +} + +; +; SQDEC +; + +; sqdec{h|w|d}(vector, pattern, multiplier) +declare @llvm.aarch64.sve.sqdech.nxv8i16(, i32, i32) +declare @llvm.aarch64.sve.sqdecw.nxv4i32(, i32, i32) +declare @llvm.aarch64.sve.sqdecd.nxv2i64(, i32, i32) + +; sqdec{b|h|w|d}(scalar, pattern, multiplier) +declare i32 @llvm.aarch64.sve.sqdecb.n32(i32, i32, i32) +declare i64 @llvm.aarch64.sve.sqdecb.n64(i64, i32, i32) +declare i32 @llvm.aarch64.sve.sqdech.n32(i32, i32, i32) +declare i64 @llvm.aarch64.sve.sqdech.n64(i64, i32, i32) +declare i32 @llvm.aarch64.sve.sqdecw.n32(i32, i32, i32) +declare i64 @llvm.aarch64.sve.sqdecw.n64(i64, i32, i32) +declare i32 @llvm.aarch64.sve.sqdecd.n32(i32, i32, i32) +declare i64 @llvm.aarch64.sve.sqdecd.n64(i64, i32, i32) + +; sqdecp(scalar, predicate) +declare i32 @llvm.aarch64.sve.sqdecp.n32.nxv16i1(i32, ) +declare i32 @llvm.aarch64.sve.sqdecp.n32.nxv8i1(i32, ) +declare i32 @llvm.aarch64.sve.sqdecp.n32.nxv4i1(i32, ) +declare i32 @llvm.aarch64.sve.sqdecp.n32.nxv2i1(i32, ) + +declare i64 @llvm.aarch64.sve.sqdecp.n64.nxv16i1(i64, ) +declare i64 @llvm.aarch64.sve.sqdecp.n64.nxv8i1(i64, ) +declare i64 @llvm.aarch64.sve.sqdecp.n64.nxv4i1(i64, ) +declare i64 @llvm.aarch64.sve.sqdecp.n64.nxv2i1(i64, ) + +; sqdecp(vector, predicate) +declare @llvm.aarch64.sve.sqdecp.nxv8i16(, ) +declare @llvm.aarch64.sve.sqdecp.nxv4i32(, ) +declare @llvm.aarch64.sve.sqdecp.nxv2i64(, ) + +; +; SQINC +; + +; sqinc{h|w|d}(vector, pattern, multiplier) +declare @llvm.aarch64.sve.sqinch.nxv8i16(, i32, i32) +declare @llvm.aarch64.sve.sqincw.nxv4i32(, i32, i32) +declare @llvm.aarch64.sve.sqincd.nxv2i64(, i32, i32) + +; sqinc{b|h|w|d}(scalar, pattern, multiplier) +declare i32 @llvm.aarch64.sve.sqincb.n32(i32, i32, i32) +declare i64 @llvm.aarch64.sve.sqincb.n64(i64, i32, i32) +declare i32 @llvm.aarch64.sve.sqinch.n32(i32, i32, i32) +declare i64 @llvm.aarch64.sve.sqinch.n64(i64, i32, i32) +declare i32 @llvm.aarch64.sve.sqincw.n32(i32, i32, i32) +declare i64 @llvm.aarch64.sve.sqincw.n64(i64, i32, i32) +declare i32 @llvm.aarch64.sve.sqincd.n32(i32, i32, i32) +declare i64 @llvm.aarch64.sve.sqincd.n64(i64, i32, i32) + +; sqincp(scalar, predicate) +declare i32 @llvm.aarch64.sve.sqincp.n32.nxv16i1(i32, ) +declare i32 @llvm.aarch64.sve.sqincp.n32.nxv8i1(i32, ) +declare i32 @llvm.aarch64.sve.sqincp.n32.nxv4i1(i32, ) +declare i32 @llvm.aarch64.sve.sqincp.n32.nxv2i1(i32, ) + +declare i64 @llvm.aarch64.sve.sqincp.n64.nxv16i1(i64, ) +declare i64 @llvm.aarch64.sve.sqincp.n64.nxv8i1(i64, ) +declare i64 @llvm.aarch64.sve.sqincp.n64.nxv4i1(i64, ) +declare i64 @llvm.aarch64.sve.sqincp.n64.nxv2i1(i64, ) + +; sqincp(vector, predicate) +declare @llvm.aarch64.sve.sqincp.nxv8i16(, ) +declare @llvm.aarch64.sve.sqincp.nxv4i32(, ) +declare @llvm.aarch64.sve.sqincp.nxv2i64(, ) + +; +; UQDEC +; + +; uqdec{h|w|d}(vector, pattern, multiplier) +declare @llvm.aarch64.sve.uqdech.nxv8i16(, i32, i32) +declare @llvm.aarch64.sve.uqdecw.nxv4i32(, i32, i32) +declare @llvm.aarch64.sve.uqdecd.nxv2i64(, i32, i32) + +; uqdec{b|h|w|d}(scalar, pattern, multiplier) +declare i32 @llvm.aarch64.sve.uqdecb.n32(i32, i32, i32) +declare i64 @llvm.aarch64.sve.uqdecb.n64(i64, i32, i32) +declare i32 @llvm.aarch64.sve.uqdech.n32(i32, i32, i32) +declare i64 @llvm.aarch64.sve.uqdech.n64(i64, i32, i32) +declare i32 @llvm.aarch64.sve.uqdecw.n32(i32, i32, i32) +declare i64 @llvm.aarch64.sve.uqdecw.n64(i64, i32, i32) +declare i32 @llvm.aarch64.sve.uqdecd.n32(i32, i32, i32) +declare i64 @llvm.aarch64.sve.uqdecd.n64(i64, i32, i32) + +; uqdecp(scalar, predicate) +declare i32 @llvm.aarch64.sve.uqdecp.n32.nxv16i1(i32, ) +declare i32 @llvm.aarch64.sve.uqdecp.n32.nxv8i1(i32, ) +declare i32 @llvm.aarch64.sve.uqdecp.n32.nxv4i1(i32, ) +declare i32 @llvm.aarch64.sve.uqdecp.n32.nxv2i1(i32, ) + +declare i64 @llvm.aarch64.sve.uqdecp.n64.nxv16i1(i64, ) +declare i64 @llvm.aarch64.sve.uqdecp.n64.nxv8i1(i64, ) +declare i64 @llvm.aarch64.sve.uqdecp.n64.nxv4i1(i64, ) +declare i64 @llvm.aarch64.sve.uqdecp.n64.nxv2i1(i64, ) + +; uqdecp(vector, predicate) +declare @llvm.aarch64.sve.uqdecp.nxv8i16(, ) +declare @llvm.aarch64.sve.uqdecp.nxv4i32(, ) +declare @llvm.aarch64.sve.uqdecp.nxv2i64(, ) + +; +; UQINC +; + +; uqinc{h|w|d}(vector, pattern, multiplier) +declare @llvm.aarch64.sve.uqinch.nxv8i16(, i32, i32) +declare @llvm.aarch64.sve.uqincw.nxv4i32(, i32, i32) +declare @llvm.aarch64.sve.uqincd.nxv2i64(, i32, i32) + +; uqinc{b|h|w|d}(scalar, pattern, multiplier) +declare i32 @llvm.aarch64.sve.uqincb.n32(i32, i32, i32) +declare i64 @llvm.aarch64.sve.uqincb.n64(i64, i32, i32) +declare i32 @llvm.aarch64.sve.uqinch.n32(i32, i32, i32) +declare i64 @llvm.aarch64.sve.uqinch.n64(i64, i32, i32) +declare i32 @llvm.aarch64.sve.uqincw.n32(i32, i32, i32) +declare i64 @llvm.aarch64.sve.uqincw.n64(i64, i32, i32) +declare i32 @llvm.aarch64.sve.uqincd.n32(i32, i32, i32) +declare i64 @llvm.aarch64.sve.uqincd.n64(i64, i32, i32) + +; uqincp(scalar, predicate) +declare i32 @llvm.aarch64.sve.uqincp.n32.nxv16i1(i32, ) +declare i32 @llvm.aarch64.sve.uqincp.n32.nxv8i1(i32, ) +declare i32 @llvm.aarch64.sve.uqincp.n32.nxv4i1(i32, ) +declare i32 @llvm.aarch64.sve.uqincp.n32.nxv2i1(i32, ) + +declare i64 @llvm.aarch64.sve.uqincp.n64.nxv16i1(i64, ) +declare i64 @llvm.aarch64.sve.uqincp.n64.nxv8i1(i64, ) +declare i64 @llvm.aarch64.sve.uqincp.n64.nxv4i1(i64, ) +declare i64 @llvm.aarch64.sve.uqincp.n64.nxv2i1(i64, ) + +; uqincp(vector, predicate) +declare @llvm.aarch64.sve.uqincp.nxv8i16(, ) +declare @llvm.aarch64.sve.uqincp.nxv4i32(, ) +declare @llvm.aarch64.sve.uqincp.nxv2i64(, )