diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -821,6 +821,11 @@ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; +class AdvSIMD_Pred3VectorArg_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; + // // Integer arithmetic @@ -830,7 +835,32 @@ def int_aarch64_sve_sub : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_subr : AdvSIMD_Pred2VectorArg_Intrinsic; -def int_aarch64_sve_bic : AdvSIMD_2VectorArg_Intrinsic; +def int_aarch64_sve_and : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_or : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_xor : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_bic : AdvSIMD_2VectorArg_Intrinsic; +def int_aarch64_sve_bic_pred : AdvSIMD_Pred2VectorArg_Intrinsic; + +def int_aarch64_sve_mul : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_smulh : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_umulh : AdvSIMD_Pred2VectorArg_Intrinsic; + +def int_aarch64_sve_sdiv : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_udiv : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_sdivr : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_udivr : AdvSIMD_Pred2VectorArg_Intrinsic; + +def int_aarch64_sve_smax : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_umax : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_smin : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_umin : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_sabd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_uabd : AdvSIMD_Pred2VectorArg_Intrinsic; + +def int_aarch64_sve_mad : AdvSIMD_Pred3VectorArg_Intrinsic; +def int_aarch64_sve_msb : AdvSIMD_Pred3VectorArg_Intrinsic; +def int_aarch64_sve_mla : AdvSIMD_Pred3VectorArg_Intrinsic; +def int_aarch64_sve_mls : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_abs : AdvSIMD_Merged1VectorArg_Intrinsic; def int_aarch64_sve_neg : AdvSIMD_Merged1VectorArg_Intrinsic; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -34,10 +34,10 @@ defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", int_aarch64_sve_sub>; defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", int_aarch64_sve_subr>; - defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", null_frag>; - defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", null_frag>; - defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and", null_frag>; - defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic", null_frag>; + defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", int_aarch64_sve_or>; + defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", int_aarch64_sve_xor>; + defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and", int_aarch64_sve_and>; + defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic", int_aarch64_sve_bic_pred>; defm ADD_ZI : sve_int_arith_imm0<0b000, "add">; defm SUB_ZI : sve_int_arith_imm0<0b001, "sub">; @@ -47,10 +47,10 @@ defm SQSUB_ZI : sve_int_arith_imm0<0b110, "sqsub">; defm UQSUB_ZI : sve_int_arith_imm0<0b111, "uqsub">; - defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad">; - defm MSB_ZPmZZ : sve_int_mladdsub_vvv_pred<0b1, "msb">; - defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla">; - defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls">; + defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad", int_aarch64_sve_mad>; + defm MSB_ZPmZZ : sve_int_mladdsub_vvv_pred<0b1, "msb", int_aarch64_sve_msb>; + defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla", int_aarch64_sve_mla>; + defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls", int_aarch64_sve_mls>; // SVE predicated integer reductions. defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv">; @@ -73,14 +73,14 @@ defm UMIN_ZI : sve_int_arith_imm1<0b11, "umin", imm0_255>; defm MUL_ZI : sve_int_arith_imm2<"mul">; - defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", null_frag>; - defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh", null_frag>; - defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh", null_frag>; + defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", int_aarch64_sve_mul>; + defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh", int_aarch64_sve_smulh>; + defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh", int_aarch64_sve_umulh>; - defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", null_frag>; - defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", null_frag>; - defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr", null_frag>; - defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr", null_frag>; + defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", int_aarch64_sve_sdiv>; + defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", int_aarch64_sve_udiv>; + defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr", int_aarch64_sve_sdivr>; + defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr", int_aarch64_sve_udivr>; defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot", int_aarch64_sve_sdot>; defm UDOT_ZZZ : sve_intx_dot<0b1, "udot", int_aarch64_sve_udot>; @@ -105,12 +105,12 @@ defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs">; defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg">; - defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", null_frag>; - defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", null_frag>; - defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", null_frag>; - defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", null_frag>; - defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", null_frag>; - defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", null_frag>; + defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", int_aarch64_sve_smax>; + defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", int_aarch64_sve_umax>; + defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", int_aarch64_sve_smin>; + defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", int_aarch64_sve_umin>; + defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", int_aarch64_sve_sabd>; + defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", int_aarch64_sve_uabd>; defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe">; defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte">; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -298,6 +298,11 @@ : Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)), (inst $Op1, $Op2, $Op3)>; +class SVE_4_Op_Pat +: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, vt4:$Op4)), + (inst $Op1, $Op2, $Op3, $Op4)>; + def SVEDup0Undef : ComplexPattern; //===----------------------------------------------------------------------===// @@ -1897,11 +1902,16 @@ let ElementSize = zprty.ElementSize; } -multiclass sve_int_mladdsub_vvv_pred opc, string asm> { +multiclass sve_int_mladdsub_vvv_pred opc, string asm, SDPatternOperator op> { def _B : sve_int_mladdsub_vvv_pred<0b00, opc, asm, ZPR8>; def _H : sve_int_mladdsub_vvv_pred<0b01, opc, asm, ZPR16>; def _S : sve_int_mladdsub_vvv_pred<0b10, opc, asm, ZPR32>; def _D : sve_int_mladdsub_vvv_pred<0b11, opc, asm, ZPR64>; + + def : SVE_4_Op_Pat(NAME # _B)>; + def : SVE_4_Op_Pat(NAME # _H)>; + def : SVE_4_Op_Pat(NAME # _S)>; + def : SVE_4_Op_Pat(NAME # _D)>; } class sve_int_mlas_vvv_pred sz8_64, bits<1> opc, string asm, @@ -1929,11 +1939,16 @@ let ElementSize = zprty.ElementSize; } -multiclass sve_int_mlas_vvv_pred opc, string asm> { +multiclass sve_int_mlas_vvv_pred opc, string asm, SDPatternOperator op> { def _B : sve_int_mlas_vvv_pred<0b00, opc, asm, ZPR8>; def _H : sve_int_mlas_vvv_pred<0b01, opc, asm, ZPR16>; def _S : sve_int_mlas_vvv_pred<0b10, opc, asm, ZPR32>; def _D : sve_int_mlas_vvv_pred<0b11, opc, asm, ZPR64>; + + def : SVE_4_Op_Pat(NAME # _B)>; + def : SVE_4_Op_Pat(NAME # _H)>; + def : SVE_4_Op_Pat(NAME # _S)>; + def : SVE_4_Op_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve-int-arith-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-arith-pred.ll --- a/llvm/test/CodeGen/AArch64/sve-int-arith-pred.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-arith-pred.ll @@ -40,9 +40,6 @@ ret %out } - - - define @sub_i8( %pg, %a, %b) { ; CHECK-LABEL: sub_i8: ; CHECK: sub z0.b, p0/m, z0.b, z1.b @@ -83,8 +80,6 @@ ret %out } - - define @subr_i8( %pg, %a, %b) { ; CHECK-LABEL: subr_i8: ; CHECK: subr z0.b, p0/m, z0.b, z1.b @@ -125,7 +120,245 @@ ret %out } +define @smax_i8( %pg, %a, %b) { +; CHECK-LABEL: smax_i8: +; CHECK: smax z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smax.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @smax_i16( %pg, %a, %b) { +; CHECK-LABEL: smax_i16: +; CHECK: smax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smax.nxv8i16( %pg, + %a, + %b) + ret %out +} +define @smax_i32( %pg, %a, %b) { +; CHECK-LABEL: smax_i32: +; CHECK: smax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smax.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @smax_i64( %pg, %a, %b) { +; CHECK-LABEL: smax_i64: +; CHECK: smax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smax.nxv2i64( %pg, + %a, + %b) + ret %out +} + +define @umax_i8( %pg, %a, %b) { +; CHECK-LABEL: umax_i8: +; CHECK: umax z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umax.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @umax_i16( %pg, %a, %b) { +; CHECK-LABEL: umax_i16: +; CHECK: umax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umax.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @umax_i32( %pg, %a, %b) { +; CHECK-LABEL: umax_i32: +; CHECK: umax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umax.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @umax_i64( %pg, %a, %b) { +; CHECK-LABEL: umax_i64: +; CHECK: umax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umax.nxv2i64( %pg, + %a, + %b) + ret %out +} + +define @smin_i8( %pg, %a, %b) { +; CHECK-LABEL: smin_i8: +; CHECK: smin z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smin.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @smin_i16( %pg, %a, %b) { +; CHECK-LABEL: smin_i16: +; CHECK: smin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smin.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @smin_i32( %pg, %a, %b) { +; CHECK-LABEL: smin_i32: +; CHECK: smin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smin.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @smin_i64( %pg, %a, %b) { +; CHECK-LABEL: smin_i64: +; CHECK: smin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smin.nxv2i64( %pg, + %a, + %b) + ret %out +} + +define @umin_i8( %pg, %a, %b) { +; CHECK-LABEL: umin_i8: +; CHECK: umin z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umin.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @umin_i16( %pg, %a, %b) { +; CHECK-LABEL: umin_i16: +; CHECK: umin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umin.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @umin_i32( %pg, %a, %b) { +; CHECK-LABEL: umin_i32: +; CHECK: umin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umin.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @umin_i64( %pg, %a, %b) { +; CHECK-LABEL: umin_i64: +; CHECK: umin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umin.nxv2i64( %pg, + %a, + %b) + ret %out +} + +define @sabd_i8( %pg, %a, %b) { +; CHECK-LABEL: sabd_i8: +; CHECK: sabd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabd.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @sabd_i16( %pg, %a, %b) { +; CHECK-LABEL: sabd_i16: +; CHECK: sabd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabd.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @sabd_i32( %pg, %a, %b) { +; CHECK-LABEL: sabd_i32: +; CHECK: sabd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabd.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @sabd_i64( %pg, %a, %b) { +; CHECK-LABEL: sabd_i64: +; CHECK: sabd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabd.nxv2i64( %pg, + %a, + %b) + ret %out +} + +define @uabd_i8( %pg, %a, %b) { +; CHECK-LABEL: uabd_i8: +; CHECK: uabd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabd.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @uabd_i16( %pg, %a, %b) { +; CHECK-LABEL: uabd_i16: +; CHECK: uabd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabd.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @uabd_i32( %pg, %a, %b) { +; CHECK-LABEL: uabd_i32: +; CHECK: uabd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabd.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @uabd_i64( %pg, %a, %b) { +; CHECK-LABEL: uabd_i64: +; CHECK: uabd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabd.nxv2i64( %pg, + %a, + %b) + ret %out +} declare @llvm.aarch64.sve.add.nxv16i8(, , ) declare @llvm.aarch64.sve.add.nxv8i16(, , ) @@ -141,3 +374,33 @@ declare @llvm.aarch64.sve.subr.nxv8i16(, , ) declare @llvm.aarch64.sve.subr.nxv4i32(, , ) declare @llvm.aarch64.sve.subr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.smax.nxv16i8(, , ) +declare @llvm.aarch64.sve.smax.nxv8i16(, , ) +declare @llvm.aarch64.sve.smax.nxv4i32(, , ) +declare @llvm.aarch64.sve.smax.nxv2i64(, , ) + +declare @llvm.aarch64.sve.umax.nxv16i8(, , ) +declare @llvm.aarch64.sve.umax.nxv8i16(, , ) +declare @llvm.aarch64.sve.umax.nxv4i32(, , ) +declare @llvm.aarch64.sve.umax.nxv2i64(, , ) + +declare @llvm.aarch64.sve.smin.nxv16i8(, , ) +declare @llvm.aarch64.sve.smin.nxv8i16(, , ) +declare @llvm.aarch64.sve.smin.nxv4i32(, , ) +declare @llvm.aarch64.sve.smin.nxv2i64(, , ) + +declare @llvm.aarch64.sve.umin.nxv16i8(, , ) +declare @llvm.aarch64.sve.umin.nxv8i16(, , ) +declare @llvm.aarch64.sve.umin.nxv4i32(, , ) +declare @llvm.aarch64.sve.umin.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sabd.nxv16i8(, , ) +declare @llvm.aarch64.sve.sabd.nxv8i16(, , ) +declare @llvm.aarch64.sve.sabd.nxv4i32(, , ) +declare @llvm.aarch64.sve.sabd.nxv2i64(, , ) + +declare @llvm.aarch64.sve.uabd.nxv16i8(, , ) +declare @llvm.aarch64.sve.uabd.nxv8i16(, , ) +declare @llvm.aarch64.sve.uabd.nxv4i32(, , ) +declare @llvm.aarch64.sve.uabd.nxv2i64(, , ) diff --git a/llvm/test/CodeGen/AArch64/sve-int-div-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-div-pred.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-int-div-pred.ll @@ -0,0 +1,91 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define @sdiv_i32( %pg, %a, %b) { +; CHECK-LABEL: sdiv_i32: +; CHECK: sdiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sdiv.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @sdiv_i64( %pg, %a, %b) { +; CHECK-LABEL: sdiv_i64: +; CHECK: sdiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sdiv.nxv2i64( %pg, + %a, + %b) + ret %out +} + +define @udiv_i32( %pg, %a, %b) { +; CHECK-LABEL: udiv_i32: +; CHECK: udiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.udiv.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @udiv_i64( %pg, %a, %b) { +; CHECK-LABEL: udiv_i64: +; CHECK: udiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.udiv.nxv2i64( %pg, + %a, + %b) + ret %out +} + +define @sdivr_i32( %pg, %a, %b) { +; CHECK-LABEL: sdivr_i32: +; CHECK: sdivr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sdivr.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @sdivr_i64( %pg, %a, %b) { +; CHECK-LABEL: sdivr_i64: +; CHECK: sdivr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sdivr.nxv2i64( %pg, + %a, + %b) + ret %out +} + +define @udivr_i32( %pg, %a, %b) { +; CHECK-LABEL: udivr_i32: +; CHECK: udivr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.udivr.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @udivr_i64( %pg, %a, %b) { +; CHECK-LABEL: udivr_i64: +; CHECK: udivr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.udivr.nxv2i64( %pg, + %a, + %b) + ret %out +} + +declare @llvm.aarch64.sve.sdiv.nxv4i32(, , ) +declare @llvm.aarch64.sve.sdiv.nxv2i64(, , ) +declare @llvm.aarch64.sve.udiv.nxv4i32(, , ) +declare @llvm.aarch64.sve.udiv.nxv2i64(, , ) +declare @llvm.aarch64.sve.sdivr.nxv4i32(, , ) +declare @llvm.aarch64.sve.sdivr.nxv2i64(, , ) +declare @llvm.aarch64.sve.udivr.nxv4i32(, , ) +declare @llvm.aarch64.sve.udivr.nxv2i64(, , ) + diff --git a/llvm/test/CodeGen/AArch64/sve-int-log-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-log-pred.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-int-log-pred.ll @@ -0,0 +1,140 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define @and_pred_i8( %pg, %a, %b) { +; CHECK-LABEL: and_pred_i8: +; CHECK: and z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.and.nxv2i8( %pg, + %a, + %b) + ret %out +} + +define @and_pred_i16( %pg, %a, %b) { +; CHECK-LABEL: and_pred_i16: +; CHECK: and z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.and.nxv2i16( %pg, + %a, + %b) + ret %out +} + + +define @and_pred_i32( %pg, %a, %b) { +; CHECK-LABEL: and_pred_i32: +; CHECK: and z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.and.nxv2i32( %pg, + %a, + %b) + ret %out +} + +define @and_pred_i64( %pg, %a, %b) { +; CHECK-LABEL: and_pred_i64: +; CHECK: and z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.and.nxv2i64( %pg, + %a, + %b) + ret %out +} + + +define @or_pred_i8( %pg, %a, %b) { +; CHECK-LABEL: or_pred_i8: +; CHECK: orr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.or.nxv2i8( %pg, + %a, + %b) + ret %out +} + +define @or_pred_i16( %pg, %a, %b) { +; CHECK-LABEL: or_pred_i16: +; CHECK: orr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.or.nxv2i16( %pg, + %a, + %b) + ret %out +} + + +define @or_pred_i32( %pg, %a, %b) { +; CHECK-LABEL: or_pred_i32: +; CHECK: orr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.or.nxv2i32( %pg, + %a, + %b) + ret %out +} + +define @or_pred_i64( %pg, %a, %b) { +; CHECK-LABEL: or_pred_i64: +; CHECK: orr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.or.nxv2i64( %pg, + %a, + %b) + ret %out +} + + +define @xor_pred_i8( %pg, %a, %b) { +; CHECK-LABEL: xor_pred_i8: +; CHECK: eor z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.xor.nxv2i8( %pg, + %a, + %b) + ret %out +} + +define @xor_pred_i16( %pg, %a, %b) { +; CHECK-LABEL: xor_pred_i16: +; CHECK: eor z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.xor.nxv2i16( %pg, + %a, + %b) + ret %out +} + + +define @xor_pred_i32( %pg, %a, %b) { +; CHECK-LABEL: xor_pred_i32: +; CHECK: eor z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.xor.nxv2i32( %pg, + %a, + %b) + ret %out +} + +define @xor_pred_i64( %pg, %a, %b) { +; CHECK-LABEL: xor_pred_i64: +; CHECK: eor z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.xor.nxv2i64( %pg, + %a, + %b) + ret %out +} + + +declare @llvm.aarch64.sve.and.nxv2i8(,,) +declare @llvm.aarch64.sve.and.nxv2i16(,,) +declare @llvm.aarch64.sve.and.nxv2i32(,,) +declare @llvm.aarch64.sve.and.nxv2i64(,,) +declare @llvm.aarch64.sve.or.nxv2i8(,,) +declare @llvm.aarch64.sve.or.nxv2i16(,,) +declare @llvm.aarch64.sve.or.nxv2i32(,,) +declare @llvm.aarch64.sve.or.nxv2i64(,,) +declare @llvm.aarch64.sve.xor.nxv2i8(,,) +declare @llvm.aarch64.sve.xor.nxv2i16(,,) +declare @llvm.aarch64.sve.xor.nxv2i32(,,) +declare @llvm.aarch64.sve.xor.nxv2i64(,,) diff --git a/llvm/test/CodeGen/AArch64/sve-int-mad-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-mad-pred.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-int-mad-pred.ll @@ -0,0 +1,199 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define @mad_i8( %pg, %a, %b, %c) { +; CHECK-LABEL: mad_i8: +; CHECK: mad z0.b, p0/m, z1.b, z2.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mad.nxv16i8( %pg, + %a, + %b, + %c) + ret %out +} + +define @mad_i16( %pg, %a, %b, %c) { +; CHECK-LABEL: mad_i16: +; CHECK: mad z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mad.nxv8i16( %pg, + %a, + %b, + %c) + ret %out +} + +define @mad_i32( %pg, %a, %b, %c) { +; CHECK-LABEL: mad_i32: +; CHECK: mad z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mad.nxv4i32( %pg, + %a, + %b, + %c) + ret %out +} + +define @mad_i64( %pg, %a, %b, %c) { +; CHECK-LABEL: mad_i64: +; CHECK: mad z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mad.nxv2i64( %pg, + %a, + %b, + %c) + ret %out +} + +define @msb_i8( %pg, %a, %b, %c) { +; CHECK-LABEL: msb_i8: +; CHECK: msb z0.b, p0/m, z1.b, z2.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.msb.nxv16i8( %pg, + %a, + %b, + %c) + ret %out +} + +define @msb_i16( %pg, %a, %b, %c) { +; CHECK-LABEL: msb_i16: +; CHECK: msb z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.msb.nxv8i16( %pg, + %a, + %b, + %c) + ret %out +} + +define @msb_i32( %pg, %a, %b, %c) { +; CHECK-LABEL: msb_i32: +; CHECK: msb z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.msb.nxv4i32( %pg, + %a, + %b, + %c) + ret %out +} + +define @msb_i64( %pg, %a, %b, %c) { +; CHECK-LABEL: msb_i64: +; CHECK: msb z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.msb.nxv2i64( %pg, + %a, + %b, + %c) + ret %out +} + + +define @mla_i8( %pg, %a, %b, %c) { +; CHECK-LABEL: mla_i8: +; CHECK: mla z0.b, p0/m, z1.b, z2.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mla.nxv16i8( %pg, + %a, + %b, + %c) + ret %out +} + +define @mla_i16( %pg, %a, %b, %c) { +; CHECK-LABEL: mla_i16: +; CHECK: mla z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mla.nxv8i16( %pg, + %a, + %b, + %c) + ret %out +} + +define @mla_i32( %pg, %a, %b, %c) { +; CHECK-LABEL: mla_i32: +; CHECK: mla z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mla.nxv4i32( %pg, + %a, + %b, + %c) + ret %out +} + +define @mla_i64( %pg, %a, %b, %c) { +; CHECK-LABEL: mla_i64: +; CHECK: mla z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mla.nxv2i64( %pg, + %a, + %b, + %c) + ret %out +} + + +define @mls_i8( %pg, %a, %b, %c) { +; CHECK-LABEL: mls_i8: +; CHECK: mls z0.b, p0/m, z1.b, z2.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mls.nxv16i8( %pg, + %a, + %b, + %c) + ret %out +} + +define @mls_i16( %pg, %a, %b, %c) { +; CHECK-LABEL: mls_i16: +; CHECK: mls z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mls.nxv8i16( %pg, + %a, + %b, + %c) + ret %out +} + +define @mls_i32( %pg, %a, %b, %c) { +; CHECK-LABEL: mls_i32: +; CHECK: mls z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mls.nxv4i32( %pg, + %a, + %b, + %c) + ret %out +} + +define @mls_i64( %pg, %a, %b, %c) { +; CHECK-LABEL: mls_i64: +; CHECK: mls z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mls.nxv2i64( %pg, + %a, + %b, + %c) + ret %out +} + +declare @llvm.aarch64.sve.mad.nxv16i8(, ,,) +declare @llvm.aarch64.sve.mad.nxv8i16(, ,,) +declare @llvm.aarch64.sve.mad.nxv4i32(, ,,) +declare @llvm.aarch64.sve.mad.nxv2i64(, ,,) + +declare @llvm.aarch64.sve.msb.nxv16i8(, ,,) +declare @llvm.aarch64.sve.msb.nxv8i16(, ,,) +declare @llvm.aarch64.sve.msb.nxv4i32(, ,,) +declare @llvm.aarch64.sve.msb.nxv2i64(, ,,) + +declare @llvm.aarch64.sve.mla.nxv16i8(, ,,) +declare @llvm.aarch64.sve.mla.nxv8i16(, ,,) +declare @llvm.aarch64.sve.mla.nxv4i32(, ,,) +declare @llvm.aarch64.sve.mla.nxv2i64(, ,,) + +declare @llvm.aarch64.sve.mls.nxv16i8(, ,,) +declare @llvm.aarch64.sve.mls.nxv8i16(, ,,) +declare @llvm.aarch64.sve.mls.nxv4i32(, ,,) +declare @llvm.aarch64.sve.mls.nxv2i64(, ,,) diff --git a/llvm/test/CodeGen/AArch64/sve-int-mul-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-mul-pred.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-int-mul-pred.ll @@ -0,0 +1,134 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define @mul_i8( %pg, %a, %b) { +; CHECK-LABEL: mul_i8: +; CHECK: mul z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @mul_i16( %pg, %a, %b) { +; CHECK-LABEL: mul_i16: +; CHECK: mul z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @mul_i32( %pg, %a, %b) { +; CHECK-LABEL: mul_i32: +; CHECK: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @mul_i64( %pg, %a, %b) { +; CHECK-LABEL: mul_i64: +; CHECK: mul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.nxv2i64( %pg, + %a, + %b) + ret %out +} + +define @smulh_i8( %pg, %a, %b) { +; CHECK-LABEL: smulh_i8: +; CHECK: smulh z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smulh.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @smulh_i16( %pg, %a, %b) { +; CHECK-LABEL: smulh_i16: +; CHECK: smulh z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smulh.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @smulh_i32( %pg, %a, %b) { +; CHECK-LABEL: smulh_i32: +; CHECK: smulh z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smulh.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @smulh_i64( %pg, %a, %b) { +; CHECK-LABEL: smulh_i64: +; CHECK: smulh z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smulh.nxv2i64( %pg, + %a, + %b) + ret %out +} + +define @umulh_i8( %pg, %a, %b) { +; CHECK-LABEL: umulh_i8: +; CHECK: umulh z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umulh.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @umulh_i16( %pg, %a, %b) { +; CHECK-LABEL: umulh_i16: +; CHECK: umulh z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umulh.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @umulh_i32( %pg, %a, %b) { +; CHECK-LABEL: umulh_i32: +; CHECK: umulh z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umulh.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @umulh_i64( %pg, %a, %b) { +; CHECK-LABEL: umulh_i64: +; CHECK: umulh z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umulh.nxv2i64( %pg, + %a, + %b) + ret %out +} + +declare @llvm.aarch64.sve.mul.nxv16i8(, , ) +declare @llvm.aarch64.sve.mul.nxv8i16(, , ) +declare @llvm.aarch64.sve.mul.nxv4i32(, , ) +declare @llvm.aarch64.sve.mul.nxv2i64(, , ) +declare @llvm.aarch64.sve.smulh.nxv16i8(, , ) +declare @llvm.aarch64.sve.smulh.nxv8i16(, , ) +declare @llvm.aarch64.sve.smulh.nxv4i32(, , ) +declare @llvm.aarch64.sve.smulh.nxv2i64(, , ) +declare @llvm.aarch64.sve.umulh.nxv16i8(, , ) +declare @llvm.aarch64.sve.umulh.nxv8i16(, , ) +declare @llvm.aarch64.sve.umulh.nxv4i32(, , ) +declare @llvm.aarch64.sve.umulh.nxv2i64(, , )