Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -830,7 +830,27 @@ def int_aarch64_sve_sub : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_subr : AdvSIMD_Pred2VectorArg_Intrinsic; -def int_aarch64_sve_bic : AdvSIMD_2VectorArg_Intrinsic; +def int_aarch64_sve_and : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_or : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_xor : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_bic : AdvSIMD_2VectorArg_Intrinsic; +def int_aarch64_sve_bic_pred : AdvSIMD_Pred2VectorArg_Intrinsic; + +def int_aarch64_sve_mul : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_smulh : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_umulh : AdvSIMD_Pred2VectorArg_Intrinsic; + +def int_aarch64_sve_sdiv : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_udiv : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_sdivr : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_udivr : AdvSIMD_Pred2VectorArg_Intrinsic; + +def int_aarch64_sve_smax : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_umax : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_smin : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_umin : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_sabd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_uabd : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_abs : AdvSIMD_Merged1VectorArg_Intrinsic; def int_aarch64_sve_neg : AdvSIMD_Merged1VectorArg_Intrinsic; Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -34,10 +34,10 @@ defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", int_aarch64_sve_sub>; defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", int_aarch64_sve_subr>; - defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", null_frag>; - defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", null_frag>; - defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and", null_frag>; - defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic", null_frag>; + defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", int_aarch64_sve_or>; + defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", int_aarch64_sve_xor>; + defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and", int_aarch64_sve_and>; + defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic", int_aarch64_sve_bic_pred>; defm ADD_ZI : sve_int_arith_imm0<0b000, "add">; defm SUB_ZI : sve_int_arith_imm0<0b001, "sub">; @@ -73,14 +73,14 @@ defm UMIN_ZI : sve_int_arith_imm1<0b11, "umin", imm0_255>; defm MUL_ZI : sve_int_arith_imm2<"mul">; - defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", null_frag>; - defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh", null_frag>; - defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh", null_frag>; + defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", int_aarch64_sve_mul>; + defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh", int_aarch64_sve_smulh>; + defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh", int_aarch64_sve_umulh>; - defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", null_frag>; - defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", null_frag>; - defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr", null_frag>; - defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr", null_frag>; + defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", int_aarch64_sve_sdiv>; + defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", int_aarch64_sve_udiv>; + defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr", int_aarch64_sve_sdivr>; + defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr", int_aarch64_sve_udivr>; defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot", int_aarch64_sve_sdot>; defm UDOT_ZZZ : sve_intx_dot<0b1, "udot", int_aarch64_sve_udot>; @@ -105,12 +105,12 @@ defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs">; defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg">; - defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", null_frag>; - defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", null_frag>; - defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", null_frag>; - defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", null_frag>; - defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", null_frag>; - defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", null_frag>; + defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", int_aarch64_sve_smax>; + defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", int_aarch64_sve_umax>; + defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", int_aarch64_sve_smin>; + defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", int_aarch64_sve_umin>; + defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", int_aarch64_sve_sabd>; + defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", int_aarch64_sve_uabd>; defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe">; defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte">; Index: llvm/test/CodeGen/AArch64/sve-int-arith-pred.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-int-arith-pred.ll +++ llvm/test/CodeGen/AArch64/sve-int-arith-pred.ll @@ -40,9 +40,6 @@ ret %out } - - - define @sub_i8( %pg, %a, %b) { ; CHECK-LABEL: sub_i8: ; CHECK: sub z0.b, p0/m, z0.b, z1.b @@ -83,8 +80,6 @@ ret %out } - - define @subr_i8( %pg, %a, %b) { ; CHECK-LABEL: subr_i8: ; CHECK: subr z0.b, p0/m, z0.b, z1.b @@ -125,7 +120,245 @@ ret %out } +define @smax_i8( %pg, %a, %b) { +; CHECK-LABEL: smax_i8: +; CHECK: smax z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smax.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @smax_i16( %pg, %a, %b) { +; CHECK-LABEL: smax_i16: +; CHECK: smax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smax.nxv8i16( %pg, + %a, + %b) + ret %out +} +define @smax_i32( %pg, %a, %b) { +; CHECK-LABEL: smax_i32: +; CHECK: smax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smax.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @smax_i64( %pg, %a, %b) { +; CHECK-LABEL: smax_i64: +; CHECK: smax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smax.nxv2i64( %pg, + %a, + %b) + ret %out +} + +define @umax_i8( %pg, %a, %b) { +; CHECK-LABEL: umax_i8: +; CHECK: umax z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umax.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @umax_i16( %pg, %a, %b) { +; CHECK-LABEL: umax_i16: +; CHECK: umax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umax.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @umax_i32( %pg, %a, %b) { +; CHECK-LABEL: umax_i32: +; CHECK: umax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umax.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @umax_i64( %pg, %a, %b) { +; CHECK-LABEL: umax_i64: +; CHECK: umax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umax.nxv2i64( %pg, + %a, + %b) + ret %out +} + +define @smin_i8( %pg, %a, %b) { +; CHECK-LABEL: smin_i8: +; CHECK: smin z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smin.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @smin_i16( %pg, %a, %b) { +; CHECK-LABEL: smin_i16: +; CHECK: smin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smin.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @smin_i32( %pg, %a, %b) { +; CHECK-LABEL: smin_i32: +; CHECK: smin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smin.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @smin_i64( %pg, %a, %b) { +; CHECK-LABEL: smin_i64: +; CHECK: smin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smin.nxv2i64( %pg, + %a, + %b) + ret %out +} + +define @umin_i8( %pg, %a, %b) { +; CHECK-LABEL: umin_i8: +; CHECK: umin z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umin.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @umin_i16( %pg, %a, %b) { +; CHECK-LABEL: umin_i16: +; CHECK: umin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umin.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @umin_i32( %pg, %a, %b) { +; CHECK-LABEL: umin_i32: +; CHECK: umin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umin.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @umin_i64( %pg, %a, %b) { +; CHECK-LABEL: umin_i64: +; CHECK: umin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umin.nxv2i64( %pg, + %a, + %b) + ret %out +} + +define @sabd_i8( %pg, %a, %b) { +; CHECK-LABEL: sabd_i8: +; CHECK: sabd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabd.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @sabd_i16( %pg, %a, %b) { +; CHECK-LABEL: sabd_i16: +; CHECK: sabd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabd.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @sabd_i32( %pg, %a, %b) { +; CHECK-LABEL: sabd_i32: +; CHECK: sabd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabd.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @sabd_i64( %pg, %a, %b) { +; CHECK-LABEL: sabd_i64: +; CHECK: sabd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabd.nxv2i64( %pg, + %a, + %b) + ret %out +} + +define @uabd_i8( %pg, %a, %b) { +; CHECK-LABEL: uabd_i8: +; CHECK: uabd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabd.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @uabd_i16( %pg, %a, %b) { +; CHECK-LABEL: uabd_i16: +; CHECK: uabd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabd.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @uabd_i32( %pg, %a, %b) { +; CHECK-LABEL: uabd_i32: +; CHECK: uabd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabd.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @uabd_i64( %pg, %a, %b) { +; CHECK-LABEL: uabd_i64: +; CHECK: uabd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabd.nxv2i64( %pg, + %a, + %b) + ret %out +} declare @llvm.aarch64.sve.add.nxv16i8(, , ) declare @llvm.aarch64.sve.add.nxv8i16(, , ) @@ -141,3 +374,33 @@ declare @llvm.aarch64.sve.subr.nxv8i16(, , ) declare @llvm.aarch64.sve.subr.nxv4i32(, , ) declare @llvm.aarch64.sve.subr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.smax.nxv16i8(, , ) +declare @llvm.aarch64.sve.smax.nxv8i16(, , ) +declare @llvm.aarch64.sve.smax.nxv4i32(, , ) +declare @llvm.aarch64.sve.smax.nxv2i64(, , ) + +declare @llvm.aarch64.sve.umax.nxv16i8(, , ) +declare @llvm.aarch64.sve.umax.nxv8i16(, , ) +declare @llvm.aarch64.sve.umax.nxv4i32(, , ) +declare @llvm.aarch64.sve.umax.nxv2i64(, , ) + +declare @llvm.aarch64.sve.smin.nxv16i8(, , ) +declare @llvm.aarch64.sve.smin.nxv8i16(, , ) +declare @llvm.aarch64.sve.smin.nxv4i32(, , ) +declare @llvm.aarch64.sve.smin.nxv2i64(, , ) + +declare @llvm.aarch64.sve.umin.nxv16i8(, , ) +declare @llvm.aarch64.sve.umin.nxv8i16(, , ) +declare @llvm.aarch64.sve.umin.nxv4i32(, , ) +declare @llvm.aarch64.sve.umin.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sabd.nxv16i8(, , ) +declare @llvm.aarch64.sve.sabd.nxv8i16(, , ) +declare @llvm.aarch64.sve.sabd.nxv4i32(, , ) +declare @llvm.aarch64.sve.sabd.nxv2i64(, , ) + +declare @llvm.aarch64.sve.uabd.nxv16i8(, , ) +declare @llvm.aarch64.sve.uabd.nxv8i16(, , ) +declare @llvm.aarch64.sve.uabd.nxv4i32(, , ) +declare @llvm.aarch64.sve.uabd.nxv2i64(, , ) Index: llvm/test/CodeGen/AArch64/sve-int-div-pred.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-int-div-pred.ll @@ -0,0 +1,91 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define @sdiv_i32( %pg, %a, %b) { +; CHECK-LABEL: sdiv_i32: +; CHECK: sdiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sdiv.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @sdiv_i64( %pg, %a, %b) { +; CHECK-LABEL: sdiv_i64: +; CHECK: sdiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sdiv.nxv2i64( %pg, + %a, + %b) + ret %out +} + +define @udiv_i32( %pg, %a, %b) { +; CHECK-LABEL: udiv_i32: +; CHECK: udiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.udiv.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @udiv_i64( %pg, %a, %b) { +; CHECK-LABEL: udiv_i64: +; CHECK: udiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.udiv.nxv2i64( %pg, + %a, + %b) + ret %out +} + +define @sdivr_i32( %pg, %a, %b) { +; CHECK-LABEL: sdivr_i32: +; CHECK: sdivr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sdivr.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @sdivr_i64( %pg, %a, %b) { +; CHECK-LABEL: sdivr_i64: +; CHECK: sdivr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sdivr.nxv2i64( %pg, + %a, + %b) + ret %out +} + +define @udivr_i32( %pg, %a, %b) { +; CHECK-LABEL: udivr_i32: +; CHECK: udivr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.udivr.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @udivr_i64( %pg, %a, %b) { +; CHECK-LABEL: udivr_i64: +; CHECK: udivr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.udivr.nxv2i64( %pg, + %a, + %b) + ret %out +} + +declare @llvm.aarch64.sve.sdiv.nxv4i32(, , ) +declare @llvm.aarch64.sve.sdiv.nxv2i64(, , ) +declare @llvm.aarch64.sve.udiv.nxv4i32(, , ) +declare @llvm.aarch64.sve.udiv.nxv2i64(, , ) +declare @llvm.aarch64.sve.sdivr.nxv4i32(, , ) +declare @llvm.aarch64.sve.sdivr.nxv2i64(, , ) +declare @llvm.aarch64.sve.udivr.nxv4i32(, , ) +declare @llvm.aarch64.sve.udivr.nxv2i64(, , ) + Index: llvm/test/CodeGen/AArch64/sve-int-log-pred.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-int-log-pred.ll @@ -0,0 +1,140 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define @and_pred_i8( %pg, %a, %b) { +; CHECK-LABEL: and_pred_i8: +; CHECK: and z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.and.nxv2i8( %pg, + %a, + %b) + ret %out +} + +define @and_pred_i16( %pg, %a, %b) { +; CHECK-LABEL: and_pred_i16: +; CHECK: and z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.and.nxv2i16( %pg, + %a, + %b) + ret %out +} + + +define @and_pred_i32( %pg, %a, %b) { +; CHECK-LABEL: and_pred_i32: +; CHECK: and z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.and.nxv2i32( %pg, + %a, + %b) + ret %out +} + +define @and_pred_i64( %pg, %a, %b) { +; CHECK-LABEL: and_pred_i64: +; CHECK: and z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.and.nxv2i64( %pg, + %a, + %b) + ret %out +} + + +define @or_pred_i8( %pg, %a, %b) { +; CHECK-LABEL: or_pred_i8: +; CHECK: orr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.or.nxv2i8( %pg, + %a, + %b) + ret %out +} + +define @or_pred_i16( %pg, %a, %b) { +; CHECK-LABEL: or_pred_i16: +; CHECK: orr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.or.nxv2i16( %pg, + %a, + %b) + ret %out +} + + +define @or_pred_i32( %pg, %a, %b) { +; CHECK-LABEL: or_pred_i32: +; CHECK: orr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.or.nxv2i32( %pg, + %a, + %b) + ret %out +} + +define @or_pred_i64( %pg, %a, %b) { +; CHECK-LABEL: or_pred_i64: +; CHECK: orr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.or.nxv2i64( %pg, + %a, + %b) + ret %out +} + + +define @xor_pred_i8( %pg, %a, %b) { +; CHECK-LABEL: xor_pred_i8: +; CHECK: eor z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.xor.nxv2i8( %pg, + %a, + %b) + ret %out +} + +define @xor_pred_i16( %pg, %a, %b) { +; CHECK-LABEL: xor_pred_i16: +; CHECK: eor z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.xor.nxv2i16( %pg, + %a, + %b) + ret %out +} + + +define @xor_pred_i32( %pg, %a, %b) { +; CHECK-LABEL: xor_pred_i32: +; CHECK: eor z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.xor.nxv2i32( %pg, + %a, + %b) + ret %out +} + +define @xor_pred_i64( %pg, %a, %b) { +; CHECK-LABEL: xor_pred_i64: +; CHECK: eor z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.xor.nxv2i64( %pg, + %a, + %b) + ret %out +} + + +declare @llvm.aarch64.sve.and.nxv2i8(,,) +declare @llvm.aarch64.sve.and.nxv2i16(,,) +declare @llvm.aarch64.sve.and.nxv2i32(,,) +declare @llvm.aarch64.sve.and.nxv2i64(,,) +declare @llvm.aarch64.sve.or.nxv2i8(,,) +declare @llvm.aarch64.sve.or.nxv2i16(,,) +declare @llvm.aarch64.sve.or.nxv2i32(,,) +declare @llvm.aarch64.sve.or.nxv2i64(,,) +declare @llvm.aarch64.sve.xor.nxv2i8(,,) +declare @llvm.aarch64.sve.xor.nxv2i16(,,) +declare @llvm.aarch64.sve.xor.nxv2i32(,,) +declare @llvm.aarch64.sve.xor.nxv2i64(,,) Index: llvm/test/CodeGen/AArch64/sve-int-mul-pred.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-int-mul-pred.ll @@ -0,0 +1,134 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define @mul_i8( %pg, %a, %b) { +; CHECK-LABEL: mul_i8: +; CHECK: mul z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @mul_i16( %pg, %a, %b) { +; CHECK-LABEL: mul_i16: +; CHECK: mul z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @mul_i32( %pg, %a, %b) { +; CHECK-LABEL: mul_i32: +; CHECK: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @mul_i64( %pg, %a, %b) { +; CHECK-LABEL: mul_i64: +; CHECK: mul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.nxv2i64( %pg, + %a, + %b) + ret %out +} + +define @smulh_i8( %pg, %a, %b) { +; CHECK-LABEL: smulh_i8: +; CHECK: smulh z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smulh.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @smulh_i16( %pg, %a, %b) { +; CHECK-LABEL: smulh_i16: +; CHECK: smulh z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smulh.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @smulh_i32( %pg, %a, %b) { +; CHECK-LABEL: smulh_i32: +; CHECK: smulh z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smulh.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @smulh_i64( %pg, %a, %b) { +; CHECK-LABEL: smulh_i64: +; CHECK: smulh z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smulh.nxv2i64( %pg, + %a, + %b) + ret %out +} + +define @umulh_i8( %pg, %a, %b) { +; CHECK-LABEL: umulh_i8: +; CHECK: umulh z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umulh.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @umulh_i16( %pg, %a, %b) { +; CHECK-LABEL: umulh_i16: +; CHECK: umulh z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umulh.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @umulh_i32( %pg, %a, %b) { +; CHECK-LABEL: umulh_i32: +; CHECK: umulh z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umulh.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @umulh_i64( %pg, %a, %b) { +; CHECK-LABEL: umulh_i64: +; CHECK: umulh z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umulh.nxv2i64( %pg, + %a, + %b) + ret %out +} + +declare @llvm.aarch64.sve.mul.nxv16i8(, , ) +declare @llvm.aarch64.sve.mul.nxv8i16(, , ) +declare @llvm.aarch64.sve.mul.nxv4i32(, , ) +declare @llvm.aarch64.sve.mul.nxv2i64(, , ) +declare @llvm.aarch64.sve.smulh.nxv16i8(, , ) +declare @llvm.aarch64.sve.smulh.nxv8i16(, , ) +declare @llvm.aarch64.sve.smulh.nxv4i32(, , ) +declare @llvm.aarch64.sve.smulh.nxv2i64(, , ) +declare @llvm.aarch64.sve.umulh.nxv16i8(, , ) +declare @llvm.aarch64.sve.umulh.nxv8i16(, , ) +declare @llvm.aarch64.sve.umulh.nxv4i32(, , ) +declare @llvm.aarch64.sve.umulh.nxv2i64(, , )