diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -399,12 +399,9 @@ uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask; bool FalseZero = FalseLanes == AArch64::FalseLanesZero; - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); - if (DType == AArch64::DestructiveBinary) - assert(DstReg != MI.getOperand(3).getReg()); - bool UseRev = false; unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx; switch (DType) { @@ -443,6 +440,7 @@ // so the Destructive Operand must be unique. bool DOPRegIsUnique = false; switch (DType) { + case AArch64::DestructiveBinary: case AArch64::DestructiveBinaryComm: case AArch64::DestructiveBinaryCommWithRev: DOPRegIsUnique = @@ -501,6 +499,7 @@ // Create the destructive operation (if required) // MachineInstrBuilder PRFX, DOP; + bool NeedDstFixup = false; if (FalseZero) { #ifndef NDEBUG assert(DOPRegIsUnique && "The destructive operand should be unique"); @@ -517,22 +516,34 @@ // After the movprfx, the destructive operand is same as Dst DOPIdx = 0; } else if (DstReg != MI.getOperand(DOPIdx).getReg()) { + if (DType == AArch64::DestructiveBinary && + DstReg == MI.getOperand(SrcIdx).getReg()) { + // For non-commutable instructions with no reverse instruction, if DstReg + // is equal to one of the source operands, other than the first one, + // insert a fixup mov to put the destination in the correct place, since + // using movprfx is not an option. + NeedDstFixup = true; + } else { #ifndef NDEBUG - assert(DOPRegIsUnique && "The destructive operand should be unique"); + assert(DOPRegIsUnique && "The destructive operand should be unique"); #endif - PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx)) - .addReg(DstReg, RegState::Define) - .addReg(MI.getOperand(DOPIdx).getReg()); - DOPIdx = 0; + PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx)) + .addReg(DstReg, RegState::Define) + .addReg(MI.getOperand(DOPIdx).getReg()); + DOPIdx = 0; + } } // // Create the destructive operation // DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode)) - .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)); + .addReg(NeedDstFixup ? MI.getOperand(DOPIdx).getReg() : DstReg, + RegState::Define | + getDeadRegState(DstIsDead && !NeedDstFixup)); switch (DType) { + case AArch64::DestructiveBinary: case AArch64::DestructiveBinaryImm: case AArch64::DestructiveBinaryComm: case AArch64::DestructiveBinaryCommWithRev: @@ -548,6 +559,12 @@ break; } + if (NeedDstFixup) + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORR_ZZZ)) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(MI.getOperand(DOPIdx).getReg()) + .addReg(MI.getOperand(DOPIdx).getReg()); + if (PRFX) { finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator()); transferImpOps(MI, PRFX, DOP); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -293,12 +293,13 @@ defm EOR_ZZZ : sve_int_bin_cons_log<0b10, "eor", xor>; defm BIC_ZZZ : sve_int_bin_cons_log<0b11, "bic", null_frag>; - defm ADD_ZPmZ : sve_int_bin_pred_arit_0<0b000, "add", "ADD_ZPZZ", int_aarch64_sve_add, DestructiveBinaryComm>; - defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", "SUB_ZPZZ", int_aarch64_sve_sub, DestructiveBinaryCommWithRev, "SUBR_ZPmZ">; - defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", "SUBR_ZPZZ", int_aarch64_sve_subr, DestructiveBinaryCommWithRev, "SUB_ZPmZ", /*isReverseInstr*/ 1>; + defm ADD_ZPmZ : sve_int_bin_pred_arit_0<0b000, "add", "ADD_ZPZZ", DestructiveBinaryComm>; + defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", "SUB_ZPZZ", DestructiveBinaryCommWithRev, "SUBR_ZPmZ">; + defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", "SUBR_ZPZZ", DestructiveBinaryCommWithRev, "SUB_ZPmZ", /*isReverseInstr*/ 1>; - defm ADD_ZPZZ : sve_int_bin_pred_bhsd; - defm SUB_ZPZZ : sve_int_bin_pred_bhsd; + defm ADD_ZPZZ : sve_int_bin_pred_bhsd; + defm SUB_ZPZZ : sve_int_bin_pred_bhsd; + defm SUBR_ZPZZ : sve_int_bin_pred_bhsd; let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in { defm ADD_ZPZZ : sve_int_bin_pred_zeroing_bhsd; @@ -345,21 +346,23 @@ defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", AArch64umin_p>; defm MUL_ZI : sve_int_arith_imm2<"mul", AArch64mul_p>; - defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", "MUL_ZPZZ", int_aarch64_sve_mul, DestructiveBinaryComm>; - defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh", "SMULH_ZPZZ", int_aarch64_sve_smulh, DestructiveBinaryComm>; - defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh", "UMULH_ZPZZ", int_aarch64_sve_umulh, DestructiveBinaryComm>; + defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", "MUL_ZPZZ", DestructiveBinaryComm>; + defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh", "SMULH_ZPZZ", DestructiveBinaryComm>; + defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh", "UMULH_ZPZZ", DestructiveBinaryComm>; - defm MUL_ZPZZ : sve_int_bin_pred_bhsd; - defm SMULH_ZPZZ : sve_int_bin_pred_bhsd; - defm UMULH_ZPZZ : sve_int_bin_pred_bhsd; + defm MUL_ZPZZ : sve_int_bin_pred_bhsd; + defm SMULH_ZPZZ : sve_int_bin_pred_bhsd; + defm UMULH_ZPZZ : sve_int_bin_pred_bhsd; - defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", "SDIV_ZPZZ", int_aarch64_sve_sdiv, DestructiveBinaryCommWithRev, "SDIVR_ZPmZ">; - defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", "UDIV_ZPZZ", int_aarch64_sve_udiv, DestructiveBinaryCommWithRev, "UDIVR_ZPmZ">; - defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr", "SDIVR_ZPZZ", int_aarch64_sve_sdivr, DestructiveBinaryCommWithRev, "SDIV_ZPmZ", /*isReverseInstr*/ 1>; - defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr", "UDIVR_ZPZZ", int_aarch64_sve_udivr, DestructiveBinaryCommWithRev, "UDIV_ZPmZ", /*isReverseInstr*/ 1>; + defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", "SDIV_ZPZZ", DestructiveBinaryCommWithRev, "SDIVR_ZPmZ">; + defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", "UDIV_ZPZZ", DestructiveBinaryCommWithRev, "UDIVR_ZPmZ">; + defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr", "SDIVR_ZPZZ", DestructiveBinaryCommWithRev, "SDIV_ZPmZ", /*isReverseInstr*/ 1>; + defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr", "UDIVR_ZPZZ", DestructiveBinaryCommWithRev, "UDIV_ZPmZ", /*isReverseInstr*/ 1>; - defm SDIV_ZPZZ : sve_int_bin_pred_sd; - defm UDIV_ZPZZ : sve_int_bin_pred_sd; + defm SDIV_ZPZZ : sve_int_bin_pred_sd; + defm UDIV_ZPZZ : sve_int_bin_pred_sd; + defm SDIVR_ZPZZ : sve_int_bin_pred_sd; + defm UDIVR_ZPZZ : sve_int_bin_pred_sd; defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot", AArch64sdot>; defm UDOT_ZZZ : sve_intx_dot<0b1, "udot", AArch64udot>; @@ -384,17 +387,19 @@ defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", AArch64fabs_mt>; defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", AArch64fneg_mt>; - defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", "SMAX_ZPZZ", int_aarch64_sve_smax, DestructiveBinaryComm>; - defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", "UMAX_ZPZZ", int_aarch64_sve_umax, DestructiveBinaryComm>; - defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", "SMIN_ZPZZ", int_aarch64_sve_smin, DestructiveBinaryComm>; - defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", "UMIN_ZPZZ", int_aarch64_sve_umin, DestructiveBinaryComm>; - defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", "SABD_ZPZZ", int_aarch64_sve_sabd, DestructiveBinaryComm>; - defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", "UABD_ZPZZ", int_aarch64_sve_uabd, DestructiveBinaryComm>; + defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", "SMAX_ZPZZ", DestructiveBinaryComm>; + defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", "UMAX_ZPZZ", DestructiveBinaryComm>; + defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", "SMIN_ZPZZ", DestructiveBinaryComm>; + defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", "UMIN_ZPZZ", DestructiveBinaryComm>; + defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", "SABD_ZPZZ", DestructiveBinaryComm>; + defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", "UABD_ZPZZ", DestructiveBinaryComm>; - defm SMAX_ZPZZ : sve_int_bin_pred_bhsd; - defm UMAX_ZPZZ : sve_int_bin_pred_bhsd; - defm SMIN_ZPZZ : sve_int_bin_pred_bhsd; - defm UMIN_ZPZZ : sve_int_bin_pred_bhsd; + defm SMAX_ZPZZ : sve_int_bin_pred_bhsd; + defm UMAX_ZPZZ : sve_int_bin_pred_bhsd; + defm SMIN_ZPZZ : sve_int_bin_pred_bhsd; + defm UMIN_ZPZZ : sve_int_bin_pred_bhsd; + defm SABD_ZPZZ : sve_int_bin_pred_bhsd; + defm UABD_ZPZZ : sve_int_bin_pred_bhsd; defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe", int_aarch64_sve_frecpe_x>; defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte", int_aarch64_sve_frsqrte_x>; @@ -408,28 +413,33 @@ defm FMAX_ZPmI : sve_fp_2op_i_p_zds<0b110, "fmax", sve_fpimm_zero_one>; defm FMIN_ZPmI : sve_fp_2op_i_p_zds<0b111, "fmin", sve_fpimm_zero_one>; - defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", "FADD_ZPZZ", int_aarch64_sve_fadd, DestructiveBinaryComm>; - defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", "FSUB_ZPZZ", int_aarch64_sve_fsub, DestructiveBinaryCommWithRev, "FSUBR_ZPmZ">; - defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul", "FMUL_ZPZZ", int_aarch64_sve_fmul, DestructiveBinaryComm>; - defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr", "FSUBR_ZPZZ", int_aarch64_sve_fsubr, DestructiveBinaryCommWithRev, "FSUB_ZPmZ", /*isReverseInstr*/ 1>; - defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm", "FMAXNM_ZPZZ", int_aarch64_sve_fmaxnm, DestructiveBinaryComm>; - defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm", "FMINNM_ZPZZ", int_aarch64_sve_fminnm, DestructiveBinaryComm>; - defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax", "FMAX_ZPZZ", int_aarch64_sve_fmax, DestructiveBinaryComm>; - defm FMIN_ZPmZ : sve_fp_2op_p_zds<0b0111, "fmin", "FMIN_ZPZZ", int_aarch64_sve_fmin, DestructiveBinaryComm>; - defm FABD_ZPmZ : sve_fp_2op_p_zds<0b1000, "fabd", "FABD_ZPZZ", int_aarch64_sve_fabd, DestructiveBinaryComm>; - defm FSCALE_ZPmZ : sve_fp_2op_p_zds_fscale<0b1001, "fscale", int_aarch64_sve_fscale>; - defm FMULX_ZPmZ : sve_fp_2op_p_zds<0b1010, "fmulx", "FMULX_ZPZZ", int_aarch64_sve_fmulx, DestructiveBinaryComm>; - defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr", "FDIVR_ZPZZ", int_aarch64_sve_fdivr, DestructiveBinaryCommWithRev, "FDIV_ZPmZ", /*isReverseInstr*/ 1>; - defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv", "FDIV_ZPZZ", int_aarch64_sve_fdiv, DestructiveBinaryCommWithRev, "FDIVR_ZPmZ">; - - defm FADD_ZPZZ : sve_fp_bin_pred_hfd; - defm FSUB_ZPZZ : sve_fp_bin_pred_hfd; - defm FMUL_ZPZZ : sve_fp_bin_pred_hfd; - defm FMAXNM_ZPZZ : sve_fp_bin_pred_hfd; - defm FMINNM_ZPZZ : sve_fp_bin_pred_hfd; - defm FMAX_ZPZZ : sve_fp_bin_pred_hfd; - defm FMIN_ZPZZ : sve_fp_bin_pred_hfd; - defm FDIV_ZPZZ : sve_fp_bin_pred_hfd; + defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", "FADD_ZPZZ", DestructiveBinaryComm>; + defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", "FSUB_ZPZZ", DestructiveBinaryCommWithRev, "FSUBR_ZPmZ">; + defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul", "FMUL_ZPZZ", DestructiveBinaryComm>; + defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr", "FSUBR_ZPZZ", DestructiveBinaryCommWithRev, "FSUB_ZPmZ", /*isReverseInstr*/ 1>; + defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm", "FMAXNM_ZPZZ", DestructiveBinaryComm>; + defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm", "FMINNM_ZPZZ", DestructiveBinaryComm>; + defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax", "FMAX_ZPZZ", DestructiveBinaryComm>; + defm FMIN_ZPmZ : sve_fp_2op_p_zds<0b0111, "fmin", "FMIN_ZPZZ", DestructiveBinaryComm>; + defm FABD_ZPmZ : sve_fp_2op_p_zds<0b1000, "fabd", "FABD_ZPZZ", DestructiveBinaryComm>; + defm FMULX_ZPmZ : sve_fp_2op_p_zds<0b1010, "fmulx", "FMULX_ZPZZ", DestructiveBinaryComm>; + defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr", "FDIVR_ZPZZ", DestructiveBinaryCommWithRev, "FDIV_ZPmZ", /*isReverseInstr*/ 1>; + defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv", "FDIV_ZPZZ", DestructiveBinaryCommWithRev, "FDIVR_ZPmZ">; + defm FSCALE_ZPmZ : sve_fp_2op_p_zds<0b1001, "fscale", "FSCALE_ZPZZ", DestructiveBinary>; + + defm FADD_ZPZZ : sve_fp_bin_pred_hfd; + defm FSUB_ZPZZ : sve_fp_bin_pred_hfd; + defm FMUL_ZPZZ : sve_fp_bin_pred_hfd; + defm FSUBR_ZPZZ : sve_fp_bin_pred_hfd; + defm FMAXNM_ZPZZ : sve_fp_bin_pred_hfd; + defm FMINNM_ZPZZ : sve_fp_bin_pred_hfd; + defm FMAX_ZPZZ : sve_fp_bin_pred_hfd; + defm FMIN_ZPZZ : sve_fp_bin_pred_hfd; + defm FABD_ZPZZ : sve_fp_bin_pred_hfd; + defm FMULX_ZPZZ : sve_fp_bin_pred_hfd; + defm FDIVR_ZPZZ : sve_fp_bin_pred_hfd; + defm FDIV_ZPZZ : sve_fp_bin_pred_hfd; + defm FSCALE_ZPZZ : sve_fp_bin_pred_hfd_fscale; let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in { defm FADD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; @@ -444,6 +454,7 @@ defm FMULX_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; defm FDIVR_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; defm FDIV_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; + defm FSCALE_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; } defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd, AArch64fadd_p>; @@ -458,20 +469,23 @@ defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd", int_aarch64_sve_fcadd>; defm FCMLA_ZPmZZ : sve_fp_fcmla<"fcmla", int_aarch64_sve_fcmla>; - defm FMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, "fmla", "FMLA_ZPZZZ", int_aarch64_sve_fmla, "FMAD_ZPmZZ">; - defm FMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b01, "fmls", "FMLS_ZPZZZ", int_aarch64_sve_fmls, "FMSB_ZPmZZ">; - defm FNMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b10, "fnmla", "FNMLA_ZPZZZ", int_aarch64_sve_fnmla, "FNMAD_ZPmZZ">; - defm FNMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b11, "fnmls", "FNMLS_ZPZZZ", int_aarch64_sve_fnmls, "FNMSB_ZPmZZ">; - - defm FMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b00, "fmad", int_aarch64_sve_fmad, "FMLA_ZPmZZ", /*isReverseInstr*/ 1>; - defm FMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b01, "fmsb", int_aarch64_sve_fmsb, "FMLS_ZPmZZ", /*isReverseInstr*/ 1>; - defm FNMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b10, "fnmad", int_aarch64_sve_fnmad, "FNMLA_ZPmZZ", /*isReverseInstr*/ 1>; - defm FNMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b11, "fnmsb", int_aarch64_sve_fnmsb, "FNMLS_ZPmZZ", /*isReverseInstr*/ 1>; - - defm FMLA_ZPZZZ : sve_fp_3op_p_zds_zx; - defm FMLS_ZPZZZ : sve_fp_3op_p_zds_zx; - defm FNMLA_ZPZZZ : sve_fp_3op_p_zds_zx; - defm FNMLS_ZPZZZ : sve_fp_3op_p_zds_zx; + defm FMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, "fmla", "FMLA_ZPZZZ", "FMAD_ZPmZZ">; + defm FMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b01, "fmls", "FMLS_ZPZZZ", "FMSB_ZPmZZ">; + defm FNMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b10, "fnmla", "FNMLA_ZPZZZ", "FNMAD_ZPmZZ">; + defm FNMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b11, "fnmls", "FNMLS_ZPZZZ", "FNMSB_ZPmZZ">; + defm FMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b00, "fmad", "FMAD_ZPZZZ", "FMLA_ZPmZZ", /*isReverseInstr*/ 1>; + defm FMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b01, "fmsb", "FMSB_ZPZZZ", "FMLS_ZPmZZ", /*isReverseInstr*/ 1>; + defm FNMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b10, "fnmad", "FNMAD_ZPZZZ", "FNMLA_ZPmZZ", /*isReverseInstr*/ 1>; + defm FNMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b11, "fnmsb", "FNMSB_ZPZZZ", "FNMLS_ZPmZZ", /*isReverseInstr*/ 1>; + + defm FMLA_ZPZZZ : sve_fp_3op_p_zds_zx; + defm FMLS_ZPZZZ : sve_fp_3op_p_zds_zx; + defm FNMLA_ZPZZZ : sve_fp_3op_p_zds_zx; + defm FNMLS_ZPZZZ : sve_fp_3op_p_zds_zx; + defm FMAD_ZPZZZ : sve_fp_3op_p_zds_zx; + defm FMSB_ZPZZZ : sve_fp_3op_p_zds_zx; + defm FNMAD_ZPZZZ : sve_fp_3op_p_zds_zx; + defm FNMSB_ZPZZZ : sve_fp_3op_p_zds_zx; multiclass fma { // Zd = Za + Zn * Zm @@ -1385,14 +1399,15 @@ defm LSL_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b11, "lsl">; // Predicated shifts - defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0000, "asr", "ASR_ZPZI", int_aarch64_sve_asr>; - defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0001, "lsr", "LSR_ZPZI", int_aarch64_sve_lsr>; - defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left_dup< 0b0011, "lsl", "LSL_ZPZI", int_aarch64_sve_lsl>; - defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right< 0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>; + defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0000, "asr", "ASR_ZPZI">; + defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0001, "lsr", "LSR_ZPZI">; + defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0011, "lsl", "LSL_ZPZI">; + defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd", "ASRD_ZPZI">; - defm ASR_ZPZI : sve_int_shift_pred_bhsd; - defm LSR_ZPZI : sve_int_shift_pred_bhsd; - defm LSL_ZPZI : sve_int_shift_pred_bhsd; + defm ASR_ZPZI : sve_int_shift_pred_bhsd; + defm LSR_ZPZI : sve_int_shift_pred_bhsd; + defm LSL_ZPZI : sve_int_shift_pred_bhsd; + defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_bhsd; let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in { defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; @@ -1401,16 +1416,19 @@ defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd; } - defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", int_aarch64_sve_asr, "ASRR_ZPmZ">; - defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", int_aarch64_sve_lsr, "LSRR_ZPmZ">; - defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", int_aarch64_sve_lsl, "LSLR_ZPmZ">; - defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr", "ASRR_ZPZZ", null_frag, "ASR_ZPmZ", /*isReverseInstr*/ 1>; - defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", "LSRR_ZPZZ", null_frag, "LSR_ZPmZ", /*isReverseInstr*/ 1>; - defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", "LSLR_ZPZZ", null_frag, "LSL_ZPmZ", /*isReverseInstr*/ 1>; + defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", "ASRR_ZPmZ">; + defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", "LSRR_ZPmZ">; + defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", "LSLR_ZPmZ">; + defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr", "ASRR_ZPZZ", "ASR_ZPmZ", /*isReverseInstr*/ 1>; + defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", "LSRR_ZPZZ", "LSR_ZPmZ", /*isReverseInstr*/ 1>; + defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", "LSLR_ZPZZ", "LSL_ZPmZ", /*isReverseInstr*/ 1>; - defm ASR_ZPZZ : sve_int_bin_pred_bhsd; - defm LSR_ZPZZ : sve_int_bin_pred_bhsd; - defm LSL_ZPZZ : sve_int_bin_pred_bhsd; + defm ASR_ZPZZ : sve_int_bin_pred_bhsd; + defm LSR_ZPZZ : sve_int_bin_pred_bhsd; + defm LSL_ZPZZ : sve_int_bin_pred_bhsd; + defm ASRR_ZPZZ : sve_int_bin_pred_bhsd; + defm LSRR_ZPZZ : sve_int_bin_pred_bhsd; + defm LSLR_ZPZZ : sve_int_bin_pred_bhsd; defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr", int_aarch64_sve_asr_wide>; defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr", int_aarch64_sve_lsr_wide>; @@ -2523,6 +2541,19 @@ defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr", null_frag>; defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr", null_frag>; + // SVE2 predicated shifts + defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl", "SQSHL_ZPZI">; + defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl", "UQSHL_ZPZI">; + defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr", "SRSHR_ZPZI">; + defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr", "URSHR_ZPZI">; + defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", "SQSHLU_ZPZI">; + + defm SQSHL_ZPZI : sve_int_shift_pred_bhsd; + defm UQSHL_ZPZI : sve_int_shift_pred_bhsd; + defm SRSHR_ZPZI : sve_int_bin_pred_shift_imm_right_bhsd; + defm URSHR_ZPZI : sve_int_bin_pred_shift_imm_right_bhsd; + defm SQSHLU_ZPZI : sve_int_bin_pred_shift_imm_left_bhsd; + let Predicates = [HasSVE2, UseExperimentalZeroingPseudos] in { defm SQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd; defm UQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd; @@ -2531,13 +2562,6 @@ defm SQSHLU_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd; } - // SVE2 predicated shifts - defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl", "SQSHL_ZPZI">; - defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl", "UQSHL_ZPZI">; - defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr", "SRSHR_ZPZI", int_aarch64_sve_srshr>; - defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr", "URSHR_ZPZI", int_aarch64_sve_urshr>; - defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", "SQSHLU_ZPZI", int_aarch64_sve_sqshlu>; - // SVE2 integer add/subtract long defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb", int_aarch64_sve_saddlb>; defm SADDLT_ZZZ : sve2_wide_int_arith_long<0b00001, "saddlt", int_aarch64_sve_saddlt>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -1651,7 +1651,7 @@ } multiclass sve_fp_2op_p_zds opc, string asm, string Ps, - SDPatternOperator op, DestructiveInstTypeEnum flags, + DestructiveInstTypeEnum flags, string revname="", bit isReverseInstr=0> { let DestructiveInstType = flags in { def _H : sve_fp_2op_p_zds<0b01, opc, asm, ZPR16>, @@ -1661,21 +1661,6 @@ def _D : sve_fp_2op_p_zds<0b11, opc, asm, ZPR64>, SVEPseudo2Instr, SVEInstr2Rev; } - - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; -} - -multiclass sve_fp_2op_p_zds_fscale opc, string asm, - SDPatternOperator op> { - def _H : sve_fp_2op_p_zds<0b01, opc, asm, ZPR16>; - def _S : sve_fp_2op_p_zds<0b10, opc, asm, ZPR32>; - def _D : sve_fp_2op_p_zds<0b11, opc, asm, ZPR64>; - - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; } multiclass sve_fp_2op_p_zds_zeroing_hsd { @@ -1797,8 +1782,7 @@ } multiclass sve_fp_3op_p_zds_a opc, string asm, string Ps, - SDPatternOperator op, string revname, - bit isReverseInstr=0> { + string revname, bit isReverseInstr=0> { let DestructiveInstType = DestructiveTernaryCommWithRev in { def _H : sve_fp_3op_p_zds_a<0b01, opc, asm, ZPR16>, SVEPseudo2Instr, SVEInstr2Rev; @@ -1807,10 +1791,6 @@ def _D : sve_fp_3op_p_zds_a<0b11, opc, asm, ZPR64>, SVEPseudo2Instr, SVEInstr2Rev; } - - def : SVE_4_Op_Pat(NAME # _H)>; - def : SVE_4_Op_Pat(NAME # _S)>; - def : SVE_4_Op_Pat(NAME # _D)>; } class sve_fp_3op_p_zds_b sz, bits<2> opc, string asm, @@ -1834,28 +1814,29 @@ let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; - let DestructiveInstType = DestructiveOther; let ElementSize = zprty.ElementSize; } -multiclass sve_fp_3op_p_zds_b opc, string asm, SDPatternOperator op, - string revname, bit isReverseInstr> { +multiclass sve_fp_3op_p_zds_b opc, string asm, string Ps, + string revname, bit isReverseInstr=0> { + let DestructiveInstType = DestructiveTernaryCommWithRev in { def _H : sve_fp_3op_p_zds_b<0b01, opc, asm, ZPR16>, - SVEInstr2Rev; + SVEPseudo2Instr, SVEInstr2Rev; def _S : sve_fp_3op_p_zds_b<0b10, opc, asm, ZPR32>, - SVEInstr2Rev; + SVEPseudo2Instr, SVEInstr2Rev; def _D : sve_fp_3op_p_zds_b<0b11, opc, asm, ZPR64>, - SVEInstr2Rev; - - def : SVE_4_Op_Pat(NAME # _H)>; - def : SVE_4_Op_Pat(NAME # _S)>; - def : SVE_4_Op_Pat(NAME # _D)>; + SVEPseudo2Instr, SVEInstr2Rev; + } } -multiclass sve_fp_3op_p_zds_zx { +multiclass sve_fp_3op_p_zds_zx { def _UNDEF_H : PredThreeOpPseudo; def _UNDEF_S : PredThreeOpPseudo; def _UNDEF_D : PredThreeOpPseudo; + + def : SVE_4_Op_Pat(NAME # _UNDEF_H)>; + def : SVE_4_Op_Pat(NAME # _UNDEF_S)>; + def : SVE_4_Op_Pat(NAME # _UNDEF_D)>; } //===----------------------------------------------------------------------===// @@ -2493,7 +2474,6 @@ } multiclass sve_int_bin_pred_arit_0 opc, string asm, string Ps, - SDPatternOperator op, DestructiveInstTypeEnum flags, string revname="", bit isReverseInstr=0> { let DestructiveInstType = flags in { @@ -2506,15 +2486,9 @@ def _D : sve_int_bin_pred_arit_log<0b11, 0b00, opc, asm, ZPR64>, SVEPseudo2Instr, SVEInstr2Rev; } - - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; } multiclass sve_int_bin_pred_arit_1 opc, string asm, string Ps, - SDPatternOperator op, DestructiveInstTypeEnum flags> { let DestructiveInstType = flags in { def _B : sve_int_bin_pred_arit_log<0b00, 0b01, opc, asm, ZPR8>, @@ -2526,15 +2500,9 @@ def _D : sve_int_bin_pred_arit_log<0b11, 0b01, opc, asm, ZPR64>, SVEPseudo2Instr; } - - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; } multiclass sve_int_bin_pred_arit_2 opc, string asm, string Ps, - SDPatternOperator op, DestructiveInstTypeEnum flags> { let DestructiveInstType = flags in { def _B : sve_int_bin_pred_arit_log<0b00, 0b10, opc, asm, ZPR8>, @@ -2546,16 +2514,10 @@ def _D : sve_int_bin_pred_arit_log<0b11, 0b10, opc, asm, ZPR64>, SVEPseudo2Instr; } - - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; } // Special case for divides which are not defined for 8b/16b elements. multiclass sve_int_bin_pred_arit_2_div opc, string asm, string Ps, - SDPatternOperator op, DestructiveInstTypeEnum flags, string revname="", bit isReverseInstr=0> { let DestructiveInstType = flags in { @@ -2564,9 +2526,6 @@ def _D : sve_int_bin_pred_arit_log<0b11, 0b10, opc, asm, ZPR64>, SVEPseudo2Instr, SVEInstr2Rev; } - - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -4957,8 +4916,7 @@ let ElementSize = zprty.ElementSize; } -multiclass sve_int_bin_pred_shift_imm_left opc, string asm, string Ps, - SDPatternOperator op = null_frag> { +multiclass sve_int_bin_pred_shift_imm_left opc, string asm, string Ps> { def _B : SVEPseudo2Instr, sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; def _H : SVEPseudo2Instr, @@ -4974,21 +4932,18 @@ let Inst{22} = imm{5}; let Inst{9-8} = imm{4-3}; } - - def : SVE_3_Op_Imm_Pat(NAME # _B)>; - def : SVE_3_Op_Imm_Pat(NAME # _H)>; - def : SVE_3_Op_Imm_Pat(NAME # _S)>; - def : SVE_3_Op_Imm_Pat(NAME # _D)>; } -// As above but shift amount takes the form of a "vector immediate". -multiclass sve_int_bin_pred_shift_imm_left_dup opc, string asm, - string Ps, SDPatternOperator op> -: sve_int_bin_pred_shift_imm_left { - def : SVE_Shift_DupImm_Pred_Pat(NAME # _B)>; - def : SVE_Shift_DupImm_Pred_Pat(NAME # _H)>; - def : SVE_Shift_DupImm_Pred_Pat(NAME # _S)>; - def : SVE_Shift_DupImm_Pred_Pat(NAME # _D)>; +multiclass sve_int_bin_pred_shift_imm_left_bhsd { + def _UNDEF_B : PredTwoOpImmPseudo; + def _UNDEF_H : PredTwoOpImmPseudo; + def _UNDEF_S : PredTwoOpImmPseudo; + def _UNDEF_D : PredTwoOpImmPseudo; + + def : SVE_3_Op_Imm_Pat(NAME # _UNDEF_B)>; + def : SVE_3_Op_Imm_Pat(NAME # _UNDEF_H)>; + def : SVE_3_Op_Imm_Pat(NAME # _UNDEF_S)>; + def : SVE_3_Op_Imm_Pat(NAME # _UNDEF_D)>; } multiclass sve_int_bin_pred_shift_imm_left_zeroing_bhsd { @@ -5003,8 +4958,7 @@ def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _ZERO_D)>; } -multiclass sve_int_bin_pred_shift_imm_right opc, string asm, string Ps, - SDPatternOperator op = null_frag> { +multiclass sve_int_bin_pred_shift_imm_right opc, string asm, string Ps> { def _B : SVEPseudo2Instr, sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; def _H : SVEPseudo2Instr, @@ -5020,21 +4974,18 @@ let Inst{22} = imm{5}; let Inst{9-8} = imm{4-3}; } - - def : SVE_3_Op_Imm_Pat(NAME # _B)>; - def : SVE_3_Op_Imm_Pat(NAME # _H)>; - def : SVE_3_Op_Imm_Pat(NAME # _S)>; - def : SVE_3_Op_Imm_Pat(NAME # _D)>; } -// As above but shift amount takes the form of a "vector immediate". -multiclass sve_int_bin_pred_shift_imm_right_dup opc, string asm, - string Ps, SDPatternOperator op> -: sve_int_bin_pred_shift_imm_right { - def : SVE_Shift_DupImm_Pred_Pat(NAME # _B)>; - def : SVE_Shift_DupImm_Pred_Pat(NAME # _H)>; - def : SVE_Shift_DupImm_Pred_Pat(NAME # _S)>; - def : SVE_Shift_DupImm_Pred_Pat(NAME # _D)>; +multiclass sve_int_bin_pred_shift_imm_right_bhsd { + def _UNDEF_B : PredTwoOpImmPseudo; + def _UNDEF_H : PredTwoOpImmPseudo; + def _UNDEF_S : PredTwoOpImmPseudo; + def _UNDEF_D : PredTwoOpImmPseudo; + + def : SVE_3_Op_Imm_Pat(NAME # _UNDEF_B)>; + def : SVE_3_Op_Imm_Pat(NAME # _UNDEF_H)>; + def : SVE_3_Op_Imm_Pat(NAME # _UNDEF_S)>; + def : SVE_3_Op_Imm_Pat(NAME # _UNDEF_D)>; } multiclass sve_int_bin_pred_shift_imm_right_zeroing_bhsd { @@ -5074,7 +5025,7 @@ } multiclass sve_int_bin_pred_shift opc, string asm, string Ps, - SDPatternOperator op, string revname, bit isReverseInstr = 0> { + string revname, bit isReverseInstr = 0> { let DestructiveInstType = DestructiveBinaryCommWithRev in { def _B : sve_int_bin_pred_shift<0b00, 0b0, opc, asm, ZPR8, ZPR8>, SVEPseudo2Instr, SVEInstr2Rev; @@ -5085,10 +5036,6 @@ def _D : sve_int_bin_pred_shift<0b11, 0b0, opc, asm, ZPR64, ZPR64>, SVEPseudo2Instr, SVEInstr2Rev; } - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; } multiclass sve_int_bin_pred_zeroing_bhsd { @@ -8037,7 +7984,7 @@ def am_sve_regreg_lsl3 : ComplexPattern", []>; // Predicated pseudo floating point two operand instructions. -multiclass sve_fp_bin_pred_hfd { +multiclass sve_fp_bin_pred_hfd { def _UNDEF_H : PredTwoOpPseudo; def _UNDEF_S : PredTwoOpPseudo; def _UNDEF_D : PredTwoOpPseudo; @@ -8048,10 +7995,28 @@ def : SVE_3_Op_Pat(NAME # _UNDEF_S)>; def : SVE_3_Op_Pat(NAME # _UNDEF_S)>; def : SVE_3_Op_Pat(NAME # _UNDEF_D)>; + + def : SVE_3_Op_Pat(NAME # _UNDEF_H)>; + def : SVE_3_Op_Pat(NAME # _UNDEF_H)>; + def : SVE_3_Op_Pat(NAME # _UNDEF_H)>; + def : SVE_3_Op_Pat(NAME # _UNDEF_S)>; + def : SVE_3_Op_Pat(NAME # _UNDEF_S)>; + def : SVE_3_Op_Pat(NAME # _UNDEF_D)>; +} + +// Predicated pseudo floating point two operand instructions, special case for fscale +multiclass sve_fp_bin_pred_hfd_fscale { + def _UNDEF_H : PredTwoOpPseudo; + def _UNDEF_S : PredTwoOpPseudo; + def _UNDEF_D : PredTwoOpPseudo; + + def : SVE_3_Op_Pat(NAME # _UNDEF_H)>; + def : SVE_3_Op_Pat(NAME # _UNDEF_S)>; + def : SVE_3_Op_Pat(NAME # _UNDEF_D)>; } // Predicated pseudo integer two operand instructions. -multiclass sve_int_bin_pred_bhsd { +multiclass sve_int_bin_pred_bhsd { def _UNDEF_B : PredTwoOpPseudo; def _UNDEF_H : PredTwoOpPseudo; def _UNDEF_S : PredTwoOpPseudo; @@ -8061,20 +8026,28 @@ def : SVE_3_Op_Pat(NAME # _UNDEF_H)>; def : SVE_3_Op_Pat(NAME # _UNDEF_S)>; def : SVE_3_Op_Pat(NAME # _UNDEF_D)>; + + def : SVE_3_Op_Pat(NAME # _UNDEF_B)>; + def : SVE_3_Op_Pat(NAME # _UNDEF_H)>; + def : SVE_3_Op_Pat(NAME # _UNDEF_S)>; + def : SVE_3_Op_Pat(NAME # _UNDEF_D)>; } // As sve_int_bin_pred but when only i32 and i64 vector types are required. -multiclass sve_int_bin_pred_sd { +multiclass sve_int_bin_pred_sd { def _UNDEF_S : PredTwoOpPseudo; def _UNDEF_D : PredTwoOpPseudo; def : SVE_3_Op_Pat(NAME # _UNDEF_S)>; def : SVE_3_Op_Pat(NAME # _UNDEF_D)>; + + def : SVE_3_Op_Pat(NAME # _UNDEF_S)>; + def : SVE_3_Op_Pat(NAME # _UNDEF_D)>; } // Predicated pseudo integer two operand instructions. Second operand is an // immediate specified by imm_[bhsd]. -multiclass sve_int_shift_pred_bhsd { def _UNDEF_B : PredTwoOpImmPseudo, FalseLanesUndef>; @@ -8086,4 +8059,9 @@ def : SVE_Shift_DupImm_Pred_Pat(NAME # _UNDEF_H)>; def : SVE_Shift_DupImm_Pred_Pat(NAME # _UNDEF_S)>; def : SVE_Shift_DupImm_Pred_Pat(NAME # _UNDEF_D)>; + + def : SVE_Shift_DupImm_Pred_Pat(NAME # _UNDEF_B)>; + def : SVE_Shift_DupImm_Pred_Pat(NAME # _UNDEF_H)>; + def : SVE_Shift_DupImm_Pred_Pat(NAME # _UNDEF_S)>; + def : SVE_Shift_DupImm_Pred_Pat(NAME # _UNDEF_D)>; } diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-movprfx.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-movprfx.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-movprfx.ll @@ -0,0 +1,1626 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s + +; +; FADD +; + +define @fadd_half_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fadd_half_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fadd.nxv8f16( %pg, + %b, + %a) + ret %out +} + +define @fadd_half_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fadd_half_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fadd.nxv8f16( %pg, + %b, + %b) + ret %out +} + +define @fadd_float_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fadd_float_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fadd.nxv4f32( %pg, + %b, + %a) + ret %out +} + +define @fadd_float_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fadd_float_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fadd.nxv4f32( %pg, + %b, + %b) + ret %out +} + +define @fadd_double_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fadd_double_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fadd.nxv2f64( %pg, + %b, + %a) + ret %out +} + +define @fadd_double_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fadd_double_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fadd.nxv2f64( %pg, + %b, + %b) + ret %out +} + +; +; FSUB +; + +define @fsub_half_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fsub_half_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fsubr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fsub.nxv8f16( %pg, + %b, + %a) + ret %out +} + +define @fsub_half_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fsub_half_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fsub.nxv8f16( %pg, + %b, + %b) + ret %out +} + +define @fsub_float_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fsub_float_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fsubr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fsub.nxv4f32( %pg, + %b, + %a) + ret %out +} + +define @fsub_float_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fsub_float_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fsub.nxv4f32( %pg, + %b, + %b) + ret %out +} + +define @fsub_double_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fsub_double_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fsubr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fsub.nxv2f64( %pg, + %b, + %a) + ret %out +} + +define @fsub_double_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fsub_double_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fsub z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fsub.nxv2f64( %pg, + %b, + %b) + ret %out +} + +; +; FSUBR +; + +define @fsubr_half_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fsubr_half_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fsubr.nxv8f16( %pg, + %b, + %a) + ret %out +} + +define @fsubr_half_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fsubr_half_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fsubr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fsubr.nxv8f16( %pg, + %b, + %b) + ret %out +} + +define @fsubr_float_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fsubr_float_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fsubr.nxv4f32( %pg, + %b, + %a) + ret %out +} + +define @fsubr_float_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fsubr_float_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fsubr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fsubr.nxv4f32( %pg, + %b, + %b) + ret %out +} + +define @fsubr_double_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fsubr_double_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fsubr.nxv2f64( %pg, + %b, + %a) + ret %out +} + +define @fsubr_double_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fsubr_double_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fsubr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fsubr.nxv2f64( %pg, + %b, + %b) + ret %out +} + +; +; FMUL +; + +define @fmul_half_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fmul_half_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmul.nxv8f16( %pg, + %b, + %a) + ret %out +} + +define @fmul_half_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fmul_half_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmul.nxv8f16( %pg, + %b, + %b) + ret %out +} + +define @fmul_float_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fmul_float_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmul.nxv4f32( %pg, + %b, + %a) + ret %out +} + +define @fmul_float_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fmul_float_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmul.nxv4f32( %pg, + %b, + %b) + ret %out +} + +define @fmul_double_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fmul_double_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmul.nxv2f64( %pg, + %b, + %a) + ret %out +} + +define @fmul_double_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fmul_double_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmul.nxv2f64( %pg, + %b, + %b) + ret %out +} + +; +; FMULX +; + +define @fmulx_half_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fmulx_half_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmulx z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmulx.nxv8f16( %pg, + %b, + %a) + ret %out +} + +define @fmulx_half_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fmulx_half_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmulx z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmulx.nxv8f16( %pg, + %b, + %b) + ret %out +} + +define @fmulx_float_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fmulx_float_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmulx z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmulx.nxv4f32( %pg, + %b, + %a) + ret %out +} + +define @fmulx_float_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fmulx_float_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmulx z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmulx.nxv4f32( %pg, + %b, + %b) + ret %out +} + +define @fmulx_double_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fmulx_double_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmulx z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmulx.nxv2f64( %pg, + %b, + %a) + ret %out +} + +define @fmulx_double_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fmulx_double_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmulx z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmulx.nxv2f64( %pg, + %b, + %b) + ret %out +} + +; +; FDIV +; + +define @fdiv_float_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fdiv_float_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fdivr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fdiv.nxv4f32( %pg, + %b, + %a) + ret %out +} + +define @fdiv_float_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fdiv_float_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fdiv.nxv4f32( %pg, + %b, + %b) + ret %out +} + +define @fdiv_double_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fdiv_double_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fdivr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fdiv.nxv2f64( %pg, + %b, + %a) + ret %out +} + +define @fdiv_double_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fdiv_double_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fdiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fdiv.nxv2f64( %pg, + %b, + %b) + ret %out +} + +; +; FDIVR +; + +define @fdivr_float_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fdivr_float_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fdivr.nxv4f32( %pg, + %b, + %a) + ret %out +} + +define @fdivr_float_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fdivr_float_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fdivr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fdivr.nxv4f32( %pg, + %b, + %b) + ret %out +} + +define @fdivr_double_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fdivr_double_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fdivr.nxv2f64( %pg, + %b, + %a) + ret %out +} + +define @fdivr_double_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fdivr_double_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fdivr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fdivr.nxv2f64( %pg, + %b, + %b) + ret %out +} + +; +; FMIN +; + +define @fmin_half_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fmin_half_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmin.nxv8f16( %pg, + %b, + %a) + ret %out +} + +define @fmin_half_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fmin_half_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmin.nxv8f16( %pg, + %b, + %b) + ret %out +} + +define @fmin_float_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fmin_float_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmin.nxv4f32( %pg, + %b, + %a) + ret %out +} + +define @fmin_float_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fmin_float_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmin.nxv4f32( %pg, + %b, + %b) + ret %out +} + +define @fmin_double_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fmin_double_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmin.nxv2f64( %pg, + %b, + %a) + ret %out +} + +define @fmin_double_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fmin_double_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmin.nxv2f64( %pg, + %b, + %b) + ret %out +} + +; +; FMAX +; + +define @fmax_half_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fmax_half_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmax.nxv8f16( %pg, + %b, + %a) + ret %out +} + +define @fmax_half_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fmax_half_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmax.nxv8f16( %pg, + %b, + %b) + ret %out +} + +define @fmax_float_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fmax_float_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmax.nxv4f32( %pg, + %b, + %a) + ret %out +} + +define @fmax_float_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fmax_float_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmax.nxv4f32( %pg, + %b, + %b) + ret %out +} + +define @fmax_double_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fmax_double_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmax.nxv2f64( %pg, + %b, + %a) + ret %out +} + +define @fmax_double_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fmax_double_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmax.nxv2f64( %pg, + %b, + %b) + ret %out +} + +; +; FABD +; + +define @fabd_half_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fabd_half_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fabd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fabd.nxv8f16( %pg, + %b, + %a) + ret %out +} + +define @fabd_half_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fabd_half_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fabd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fabd.nxv8f16( %pg, + %b, + %b) + ret %out +} + +define @fabd_float_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fabd_float_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fabd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fabd.nxv4f32( %pg, + %b, + %a) + ret %out +} + +define @fabd_float_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fabd_float_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fabd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fabd.nxv4f32( %pg, + %b, + %b) + ret %out +} + +define @fabd_double_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fabd_double_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fabd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fabd.nxv2f64( %pg, + %b, + %a) + ret %out +} + +define @fabd_double_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fabd_double_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fabd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fabd.nxv2f64( %pg, + %b, + %b) + ret %out +} + +; +; FMINNM +; + +define @fminnm_half_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fminnm_half_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fminnm.nxv8f16( %pg, + %b, + %a) + ret %out +} + +define @fminnm_half_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fminnm_half_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fminnm.nxv8f16( %pg, + %b, + %b) + ret %out +} + +define @fminnm_float_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fminnm_float_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fminnm.nxv4f32( %pg, + %b, + %a) + ret %out +} + +define @fminnm_float_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fminnm_float_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fminnm.nxv4f32( %pg, + %b, + %b) + ret %out +} + +define @fminnm_double_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fminnm_double_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fminnm.nxv2f64( %pg, + %b, + %a) + ret %out +} + +define @fminnm_double_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fminnm_double_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fminnm.nxv2f64( %pg, + %b, + %b) + ret %out +} + +; +; FMAXNM +; + +define @fmaxnm_half_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fmaxnm_half_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmaxnm.nxv8f16( %pg, + %b, + %a) + ret %out +} + +define @fmaxnm_half_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fmaxnm_half_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmaxnm.nxv8f16( %pg, + %b, + %b) + ret %out +} + +define @fmaxnm_float_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fmaxnm_float_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmaxnm.nxv4f32( %pg, + %b, + %a) + ret %out +} + +define @fmaxnm_float_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fmaxnm_float_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmaxnm.nxv4f32( %pg, + %b, + %b) + ret %out +} + +define @fmaxnm_double_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fmaxnm_double_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmaxnm.nxv2f64( %pg, + %b, + %a) + ret %out +} + +define @fmaxnm_double_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: fmaxnm_double_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmaxnm.nxv2f64( %pg, + %b, + %b) + ret %out +} + +; +; FSCALE +; + +define @fscale_half_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fscale_half_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fscale z1.h, p0/m, z1.h, z0.h +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fscale.nxv8f16( %pg, + %b, + %a) + ret %out +} + +define @fscale_half_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fscale_half_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fscale z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fscale.nxv8f16( %pg, + %b, + %c) + ret %out +} + +define @fscale_float_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fscale_float_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fscale z1.s, p0/m, z1.s, z0.s +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fscale.nxv4f32( %pg, + %b, + %a) + ret %out +} + +define @fscale_float_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fscale_float_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fscale z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fscale.nxv4f32( %pg, + %b, + %c) + ret %out +} + +define @fscale_double_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: fscale_double_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fscale z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fscale.nxv2f64( %pg, + %b, + %a) + ret %out +} + +define @fscale_double_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fscale_double_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fscale z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fscale.nxv2f64( %pg, + %b, + %c) + ret %out +} + +; +; FMLA +; + +define @fmla_half_rev( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fmla_half_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmad z0.h, p0/m, z2.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmla.nxv8f16( %pg, + %b, + %a, + %c) + ret %out +} + +define @fmla_half_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fmla_half_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmla.nxv8f16( %pg, + %b, + %b, + %c) + ret %out +} + +define @fmla_float_rev( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fmla_float_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmad z0.s, p0/m, z2.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmla.nxv4f32( %pg, + %b, + %a, + %c) + ret %out +} + +define @fmla_float_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fmla_float_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmla z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmla.nxv4f32( %pg, + %b, + %b, + %c) + ret %out +} + +define @fmla_double_rev( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fmla_double_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmad z0.d, p0/m, z2.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmla.nxv2f64( %pg, + %b, + %a, + %c) + ret %out +} + +define @fmla_double_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fmla_double_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmla z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmla.nxv2f64( %pg, + %b, + %b, + %c) + ret %out +} + +; +; FMAD +; + +define @fmad_half_rev( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fmad_half_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmla z0.h, p0/m, z2.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmad.nxv8f16( %pg, + %b, + %a, + %c) + ret %out +} + +define @fmad_half_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fmad_half_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmad.nxv8f16( %pg, + %b, + %b, + %c) + ret %out +} + +define @fmad_float_rev( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fmad_float_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmla z0.s, p0/m, z2.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmad.nxv4f32( %pg, + %b, + %a, + %c) + ret %out +} + +define @fmad_float_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fmad_float_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmad.nxv4f32( %pg, + %b, + %b, + %c) + ret %out +} + +define @fmad_double_rev( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fmad_double_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmla z0.d, p0/m, z2.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmad.nxv2f64( %pg, + %b, + %a, + %c) + ret %out +} + +define @fmad_double_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fmad_double_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmad.nxv2f64( %pg, + %b, + %b, + %c) + ret %out +} + +; +; FMLS +; + +define @fmls_half_rev( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fmls_half_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmsb z0.h, p0/m, z2.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmls.nxv8f16( %pg, + %b, + %a, + %c) + ret %out +} + +define @fmls_half_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fmls_half_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmls z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmls.nxv8f16( %pg, + %b, + %b, + %c) + ret %out +} + +define @fmls_float_rev( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fmls_float_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmsb z0.s, p0/m, z2.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmls.nxv4f32( %pg, + %b, + %a, + %c) + ret %out +} + +define @fmls_float_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fmls_float_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmls z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmls.nxv4f32( %pg, + %b, + %b, + %c) + ret %out +} + +define @fmls_double_rev( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fmls_double_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmsb z0.d, p0/m, z2.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmls.nxv2f64( %pg, + %b, + %a, + %c) + ret %out +} + +define @fmls_double_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fmls_double_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmls z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmls.nxv2f64( %pg, + %b, + %b, + %c) + ret %out +} + +; +; FMSB +; + +define @fmsb_half_rev( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fmsb_half_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmls z0.h, p0/m, z2.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmsb.nxv8f16( %pg, + %b, + %a, + %c) + ret %out +} + +define @fmsb_half_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fmsb_half_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmsb z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmsb.nxv8f16( %pg, + %b, + %b, + %c) + ret %out +} + +define @fmsb_float_rev( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fmsb_float_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmls z0.s, p0/m, z2.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmsb.nxv4f32( %pg, + %b, + %a, + %c) + ret %out +} + +define @fmsb_float_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fmsb_float_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmsb z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmsb.nxv4f32( %pg, + %b, + %b, + %c) + ret %out +} + +define @fmsb_double_rev( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fmsb_double_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fmls z0.d, p0/m, z2.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmsb.nxv2f64( %pg, + %b, + %a, + %c) + ret %out +} + +define @fmsb_double_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fmsb_double_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fmsb z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmsb.nxv2f64( %pg, + %b, + %b, + %c) + ret %out +} + +; +; FNMLA +; + +define @fnmla_half_rev( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fnmla_half_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fnmad z0.h, p0/m, z2.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmla.nxv8f16( %pg, + %b, + %a, + %c) + ret %out +} + +define @fnmla_half_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fnmla_half_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fnmla z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmla.nxv8f16( %pg, + %b, + %b, + %c) + ret %out +} + +define @fnmla_float_rev( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fnmla_float_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fnmad z0.s, p0/m, z2.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmla.nxv4f32( %pg, + %b, + %a, + %c) + ret %out +} + +define @fnmla_float_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fnmla_float_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fnmla z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmla.nxv4f32( %pg, + %b, + %b, + %c) + ret %out +} + +define @fnmla_double_rev( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fnmla_double_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fnmad z0.d, p0/m, z2.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmla.nxv2f64( %pg, + %b, + %a, + %c) + ret %out +} + +define @fnmla_double_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fnmla_double_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fnmla z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmla.nxv2f64( %pg, + %b, + %b, + %c) + ret %out +} + +; +; FNMAD +; + +define @fnmad_half_rev( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fnmad_half_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fnmla z0.h, p0/m, z2.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmad.nxv8f16( %pg, + %b, + %a, + %c) + ret %out +} + +define @fnmad_half_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fnmad_half_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fnmad z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmad.nxv8f16( %pg, + %b, + %b, + %c) + ret %out +} + +define @fnmad_float_rev( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fnmad_float_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fnmla z0.s, p0/m, z2.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmad.nxv4f32( %pg, + %b, + %a, + %c) + ret %out +} + +define @fnmad_float_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fnmad_float_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fnmad z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmad.nxv4f32( %pg, + %b, + %b, + %c) + ret %out +} + +define @fnmad_double_rev( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fnmad_double_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fnmla z0.d, p0/m, z2.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmad.nxv2f64( %pg, + %b, + %a, + %c) + ret %out +} + +define @fnmad_double_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fnmad_double_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fnmad z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmad.nxv2f64( %pg, + %b, + %b, + %c) + ret %out +} + +; +; FNMLS +; + +define @fnmls_half_rev( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fnmls_half_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fnmsb z0.h, p0/m, z2.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmls.nxv8f16( %pg, + %b, + %a, + %c) + ret %out +} + +define @fnmls_half_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fnmls_half_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fnmls z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmls.nxv8f16( %pg, + %b, + %b, + %c) + ret %out +} + +define @fnmls_float_rev( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fnmls_float_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fnmsb z0.s, p0/m, z2.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmls.nxv4f32( %pg, + %b, + %a, + %c) + ret %out +} + +define @fnmls_float_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fnmls_float_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fnmls z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmls.nxv4f32( %pg, + %b, + %b, + %c) + ret %out +} + +define @fnmls_double_rev( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fnmls_double_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fnmsb z0.d, p0/m, z2.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmls.nxv2f64( %pg, + %b, + %a, + %c) + ret %out +} + +define @fnmls_double_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fnmls_double_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fnmls z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmls.nxv2f64( %pg, + %b, + %b, + %c) + ret %out +} + +; +; FNMSB +; + +define @fnmsb_half_rev( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fnmsb_half_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fnmls z0.h, p0/m, z2.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmsb.nxv8f16( %pg, + %b, + %a, + %c) + ret %out +} + +define @fnmsb_half_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fnmsb_half_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fnmsb z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmsb.nxv8f16( %pg, + %b, + %b, + %c) + ret %out +} + +define @fnmsb_float_rev( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fnmsb_float_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fnmls z0.s, p0/m, z2.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmsb.nxv4f32( %pg, + %b, + %a, + %c) + ret %out +} + +define @fnmsb_float_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fnmsb_float_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fnmsb z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmsb.nxv4f32( %pg, + %b, + %b, + %c) + ret %out +} + +define @fnmsb_double_rev( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fnmsb_double_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: fnmls z0.d, p0/m, z2.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmsb.nxv2f64( %pg, + %b, + %a, + %c) + ret %out +} + +define @fnmsb_double_movprfx( %pg, %a, %b, %c) #0 { +; CHECK-LABEL: fnmsb_double_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fnmsb z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmsb.nxv2f64( %pg, + %b, + %b, + %c) + ret %out +} + +declare @llvm.aarch64.sve.fadd.nxv8f16(, , ) +declare @llvm.aarch64.sve.fadd.nxv4f32(, , ) +declare @llvm.aarch64.sve.fadd.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fsub.nxv8f16(, , ) +declare @llvm.aarch64.sve.fsub.nxv4f32(, , ) +declare @llvm.aarch64.sve.fsub.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fsubr.nxv8f16(, , ) +declare @llvm.aarch64.sve.fsubr.nxv4f32(, , ) +declare @llvm.aarch64.sve.fsubr.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fmul.nxv8f16(, , ) +declare @llvm.aarch64.sve.fmul.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmul.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fmulx.nxv8f16(, , ) +declare @llvm.aarch64.sve.fmulx.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmulx.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fdiv.nxv8f16(, , ) +declare @llvm.aarch64.sve.fdiv.nxv4f32(, , ) +declare @llvm.aarch64.sve.fdiv.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fdivr.nxv8f16(, , ) +declare @llvm.aarch64.sve.fdivr.nxv4f32(, , ) +declare @llvm.aarch64.sve.fdivr.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fmin.nxv8f16(, , ) +declare @llvm.aarch64.sve.fmin.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmin.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fmax.nxv8f16(, , ) +declare @llvm.aarch64.sve.fmax.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmax.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fabd.nxv8f16(, , ) +declare @llvm.aarch64.sve.fabd.nxv4f32(, , ) +declare @llvm.aarch64.sve.fabd.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fminnm.nxv8f16(, , ) +declare @llvm.aarch64.sve.fminnm.nxv4f32(, , ) +declare @llvm.aarch64.sve.fminnm.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fmaxnm.nxv8f16(, , ) +declare @llvm.aarch64.sve.fmaxnm.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmaxnm.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fscale.nxv8f16(, , ) +declare @llvm.aarch64.sve.fscale.nxv4f32(, , ) +declare @llvm.aarch64.sve.fscale.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fmla.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fmla.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fmla.nxv2f64(, , , ) + +declare @llvm.aarch64.sve.fmls.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fmls.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fmls.nxv2f64(, , , ) + +declare @llvm.aarch64.sve.fnmla.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fnmla.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fnmla.nxv2f64(, , , ) + +declare @llvm.aarch64.sve.fnmls.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fnmls.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fnmls.nxv2f64(, , , ) + +declare @llvm.aarch64.sve.fmad.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fmad.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fmad.nxv2f64(, , , ) + +declare @llvm.aarch64.sve.fmsb.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fmsb.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fmsb.nxv2f64(, , , ) + +declare @llvm.aarch64.sve.fnmad.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fnmad.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fnmad.nxv2f64(, , , ) + +declare @llvm.aarch64.sve.fnmsb.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fnmsb.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fnmsb.nxv2f64(, , , ) + +attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-movprfx.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-movprfx.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-movprfx.ll @@ -0,0 +1,1428 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s + +; +; ADD +; + +define @add_i8_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: add_i8_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: add z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.add.nxv16i8( %pg, + %b, + %a) + ret %out +} + +define @add_i8_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: add_i8_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: add z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.add.nxv16i8( %pg, + %b, + %b) + ret %out +} + +define @add_i16_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: add_i16_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.add.nxv8i16( %pg, + %b, + %a) + ret %out +} + +define @add_i16_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: add_i16_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.add.nxv8i16( %pg, + %b, + %b) + ret %out +} + +define @add_i32_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: add_i32_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.add.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @add_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: add_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.add.nxv4i32( %pg, + %b, + %b) + ret %out +} + +define @add_i64_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: add_i64_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.add.nxv2i64( %pg, + %b, + %a) + ret %out +} + +define @add_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: add_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.add.nxv2i64( %pg, + %b, + %b) + ret %out +} + +; +; SUB +; + +define @sub_i8_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: sub_i8_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: subr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sub.nxv16i8( %pg, + %b, + %a) + ret %out +} + +define @sub_i8_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: sub_i8_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sub z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sub.nxv16i8( %pg, + %b, + %b) + ret %out +} + +define @sub_i16_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: sub_i16_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: subr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sub.nxv8i16( %pg, + %b, + %a) + ret %out +} + +define @sub_i16_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: sub_i16_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sub z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sub.nxv8i16( %pg, + %b, + %b) + ret %out +} + +define @sub_i32_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: sub_i32_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: subr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sub.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @sub_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: sub_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sub z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sub.nxv4i32( %pg, + %b, + %b) + ret %out +} + +define @sub_i64_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: sub_i64_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: subr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sub.nxv2i64( %pg, + %b, + %a) + ret %out +} + +define @sub_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: sub_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sub z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sub.nxv2i64( %pg, + %b, + %b) + ret %out +} + +; +; SUBR +; + +define @subr_i8_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: subr_i8_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: sub z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.subr.nxv16i8( %pg, + %b, + %a) + ret %out +} + +define @subr_i8_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: subr_i8_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: subr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.subr.nxv16i8( %pg, + %b, + %b) + ret %out +} + +define @subr_i16_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: subr_i16_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: sub z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.subr.nxv8i16( %pg, + %b, + %a) + ret %out +} + +define @subr_i16_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: subr_i16_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: subr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.subr.nxv8i16( %pg, + %b, + %b) + ret %out +} + +define @subr_i32_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: subr_i32_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: sub z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.subr.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @subr_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: subr_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: subr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.subr.nxv4i32( %pg, + %b, + %b) + ret %out +} + +define @subr_i64_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: subr_i64_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: sub z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.subr.nxv2i64( %pg, + %b, + %a) + ret %out +} + +define @subr_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: subr_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: subr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.subr.nxv2i64( %pg, + %b, + %b) + ret %out +} + +; +; MUL +; + +define @mul_i8_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: mul_i8_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.nxv16i8( %pg, + %b, + %a) + ret %out +} + +define @mul_i8_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: mul_i8_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.nxv16i8( %pg, + %b, + %b) + ret %out +} + +define @mul_i16_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: mul_i16_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.nxv8i16( %pg, + %b, + %a) + ret %out +} + +define @mul_i16_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: mul_i16_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.nxv8i16( %pg, + %b, + %b) + ret %out +} + +define @mul_i32_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: mul_i32_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @mul_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: mul_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.nxv4i32( %pg, + %b, + %b) + ret %out +} + +define @mul_i64_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: mul_i64_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.nxv2i64( %pg, + %b, + %a) + ret %out +} + +define @mul_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: mul_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.nxv2i64( %pg, + %b, + %b) + ret %out +} + +; +; SMULH +; + +define @smulh_i8_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: smulh_i8_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: smulh z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smulh.nxv16i8( %pg, + %b, + %a) + ret %out +} + +define @smulh_i8_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: smulh_i8_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: smulh z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smulh.nxv16i8( %pg, + %b, + %b) + ret %out +} + +define @smulh_i16_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: smulh_i16_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: smulh z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smulh.nxv8i16( %pg, + %b, + %a) + ret %out +} + +define @smulh_i16_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: smulh_i16_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: smulh z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smulh.nxv8i16( %pg, + %b, + %b) + ret %out +} + +define @smulh_i32_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: smulh_i32_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: smulh z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smulh.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @smulh_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: smulh_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: smulh z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smulh.nxv4i32( %pg, + %b, + %b) + ret %out +} + +define @smulh_i64_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: smulh_i64_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: smulh z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smulh.nxv2i64( %pg, + %b, + %a) + ret %out +} + +define @smulh_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: smulh_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: smulh z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smulh.nxv2i64( %pg, + %b, + %b) + ret %out +} + +; +; UMULH +; + +define @umulh_i8_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: umulh_i8_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: umulh z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umulh.nxv16i8( %pg, + %b, + %a) + ret %out +} + +define @umulh_i8_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: umulh_i8_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: umulh z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umulh.nxv16i8( %pg, + %b, + %b) + ret %out +} + +define @umulh_i16_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: umulh_i16_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umulh.nxv8i16( %pg, + %b, + %a) + ret %out +} + +define @umulh_i16_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: umulh_i16_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umulh.nxv8i16( %pg, + %b, + %b) + ret %out +} + +define @umulh_i32_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: umulh_i32_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umulh.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @umulh_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: umulh_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umulh.nxv4i32( %pg, + %b, + %b) + ret %out +} + +define @umulh_i64_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: umulh_i64_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umulh.nxv2i64( %pg, + %b, + %a) + ret %out +} + +define @umulh_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: umulh_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umulh.nxv2i64( %pg, + %b, + %b) + ret %out +} + +; +; SDIV +; + +define @sdiv_i32_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: sdiv_i32_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: sdivr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sdiv.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @sdiv_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: sdiv_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sdiv.nxv4i32( %pg, + %b, + %b) + ret %out +} + +define @sdiv_i64_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: sdiv_i64_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: sdivr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sdiv.nxv2i64( %pg, + %b, + %a) + ret %out +} + +define @sdiv_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: sdiv_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sdiv.nxv2i64( %pg, + %b, + %b) + ret %out +} + +; +; SDIVR +; + +define @sdivr_i32_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: sdivr_i32_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sdivr.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @sdivr_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: sdivr_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sdivr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sdivr.nxv4i32( %pg, + %b, + %b) + ret %out +} + +define @sdivr_i64_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: sdivr_i64_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sdivr.nxv2i64( %pg, + %b, + %a) + ret %out +} + +define @sdivr_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: sdivr_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sdivr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sdivr.nxv2i64( %pg, + %b, + %b) + ret %out +} + +; +; UDIV +; + +define @udiv_i32_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: udiv_i32_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: udivr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.udiv.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @udiv_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: udiv_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.udiv.nxv4i32( %pg, + %b, + %b) + ret %out +} + +define @udiv_i64_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: udiv_i64_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: udivr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.udiv.nxv2i64( %pg, + %b, + %a) + ret %out +} + +define @udiv_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: udiv_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.udiv.nxv2i64( %pg, + %b, + %b) + ret %out +} + +; +; UDIVR +; + +define @udivr_i32_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: udivr_i32_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.udivr.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @udivr_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: udivr_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: udivr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.udivr.nxv4i32( %pg, + %b, + %b) + ret %out +} + +define @udivr_i64_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: udivr_i64_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.udivr.nxv2i64( %pg, + %b, + %a) + ret %out +} + +define @udivr_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: udivr_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: udivr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.udivr.nxv2i64( %pg, + %b, + %b) + ret %out +} + +; +; SMIN +; + +define @smin_i8_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: smin_i8_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smin.nxv16i8( %pg, + %b, + %a) + ret %out +} + +define @smin_i8_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: smin_i8_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smin.nxv16i8( %pg, + %b, + %b) + ret %out +} + +define @smin_i16_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: smin_i16_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smin.nxv8i16( %pg, + %b, + %a) + ret %out +} + +define @smin_i16_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: smin_i16_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smin.nxv8i16( %pg, + %b, + %b) + ret %out +} + +define @smin_i32_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: smin_i32_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smin.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @smin_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: smin_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smin.nxv4i32( %pg, + %b, + %b) + ret %out +} + +define @smin_i64_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: smin_i64_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smin.nxv2i64( %pg, + %b, + %a) + ret %out +} + +define @smin_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: smin_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smin.nxv2i64( %pg, + %b, + %b) + ret %out +} + +; +; SMAX +; + +define @smax_i8_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: smax_i8_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smax.nxv16i8( %pg, + %b, + %a) + ret %out +} + +define @smax_i8_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: smax_i8_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smax.nxv16i8( %pg, + %b, + %b) + ret %out +} + +define @smax_i16_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: smax_i16_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smax.nxv8i16( %pg, + %b, + %a) + ret %out +} + +define @smax_i16_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: smax_i16_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smax.nxv8i16( %pg, + %b, + %b) + ret %out +} + +define @smax_i32_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: smax_i32_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smax.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @smax_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: smax_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smax.nxv4i32( %pg, + %b, + %b) + ret %out +} + +define @smax_i64_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: smax_i64_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smax.nxv2i64( %pg, + %b, + %a) + ret %out +} + +define @smax_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: smax_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smax.nxv2i64( %pg, + %b, + %b) + ret %out +} + +; +; SABD +; + +define @sabd_i8_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: sabd_i8_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: sabd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabd.nxv16i8( %pg, + %b, + %a) + ret %out +} + +define @sabd_i8_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: sabd_i8_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sabd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabd.nxv16i8( %pg, + %b, + %b) + ret %out +} + +define @sabd_i16_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: sabd_i16_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: sabd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabd.nxv8i16( %pg, + %b, + %a) + ret %out +} + +define @sabd_i16_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: sabd_i16_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sabd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabd.nxv8i16( %pg, + %b, + %b) + ret %out +} + +define @sabd_i32_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: sabd_i32_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: sabd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabd.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @sabd_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: sabd_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sabd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabd.nxv4i32( %pg, + %b, + %b) + ret %out +} + +define @sabd_i64_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: sabd_i64_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: sabd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabd.nxv2i64( %pg, + %b, + %a) + ret %out +} + +define @sabd_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: sabd_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sabd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabd.nxv2i64( %pg, + %b, + %b) + ret %out +} + +; +; UMIN +; + +define @umin_i8_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: umin_i8_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umin.nxv16i8( %pg, + %b, + %a) + ret %out +} + +define @umin_i8_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: umin_i8_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umin.nxv16i8( %pg, + %b, + %b) + ret %out +} + +define @umin_i16_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: umin_i16_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umin.nxv8i16( %pg, + %b, + %a) + ret %out +} + +define @umin_i16_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: umin_i16_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umin.nxv8i16( %pg, + %b, + %b) + ret %out +} + +define @umin_i32_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: umin_i32_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umin.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @umin_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: umin_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umin.nxv4i32( %pg, + %b, + %b) + ret %out +} + +define @umin_i64_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: umin_i64_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umin.nxv2i64( %pg, + %b, + %a) + ret %out +} + +define @umin_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: umin_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umin.nxv2i64( %pg, + %b, + %b) + ret %out +} + +; +; UMAX +; + +define @umax_i8_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: umax_i8_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umax.nxv16i8( %pg, + %b, + %a) + ret %out +} + +define @umax_i8_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: umax_i8_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umax.nxv16i8( %pg, + %b, + %b) + ret %out +} + +define @umax_i16_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: umax_i16_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umax.nxv8i16( %pg, + %b, + %a) + ret %out +} + +define @umax_i16_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: umax_i16_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umax.nxv8i16( %pg, + %b, + %b) + ret %out +} + +define @umax_i32_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: umax_i32_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umax.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @umax_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: umax_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umax.nxv4i32( %pg, + %b, + %b) + ret %out +} + +define @umax_i64_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: umax_i64_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umax.nxv2i64( %pg, + %b, + %a) + ret %out +} + +define @umax_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: umax_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umax.nxv2i64( %pg, + %b, + %b) + ret %out +} + +; +; UABD +; + +define @uabd_i8_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: uabd_i8_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: uabd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabd.nxv16i8( %pg, + %b, + %a) + ret %out +} + +define @uabd_i8_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: uabd_i8_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uabd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabd.nxv16i8( %pg, + %b, + %b) + ret %out +} + +define @uabd_i16_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: uabd_i16_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: uabd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabd.nxv8i16( %pg, + %b, + %a) + ret %out +} + +define @uabd_i16_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: uabd_i16_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uabd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabd.nxv8i16( %pg, + %b, + %b) + ret %out +} + +define @uabd_i32_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: uabd_i32_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: uabd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabd.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @uabd_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: uabd_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uabd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabd.nxv4i32( %pg, + %b, + %b) + ret %out +} + +define @uabd_i64_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: uabd_i64_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: uabd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabd.nxv2i64( %pg, + %b, + %a) + ret %out +} + +define @uabd_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: uabd_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uabd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabd.nxv2i64( %pg, + %b, + %b) + ret %out +} + +declare @llvm.aarch64.sve.add.nxv16i8(, , ) +declare @llvm.aarch64.sve.add.nxv8i16(, , ) +declare @llvm.aarch64.sve.add.nxv4i32(, , ) +declare @llvm.aarch64.sve.add.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sub.nxv16i8(, , ) +declare @llvm.aarch64.sve.sub.nxv8i16(, , ) +declare @llvm.aarch64.sve.sub.nxv4i32(, , ) +declare @llvm.aarch64.sve.sub.nxv2i64(, , ) + +declare @llvm.aarch64.sve.subr.nxv16i8(, , ) +declare @llvm.aarch64.sve.subr.nxv8i16(, , ) +declare @llvm.aarch64.sve.subr.nxv4i32(, , ) +declare @llvm.aarch64.sve.subr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.mul.nxv16i8(, , ) +declare @llvm.aarch64.sve.mul.nxv8i16(, , ) +declare @llvm.aarch64.sve.mul.nxv4i32(, , ) +declare @llvm.aarch64.sve.mul.nxv2i64(, , ) + +declare @llvm.aarch64.sve.smulh.nxv16i8(, , ) +declare @llvm.aarch64.sve.smulh.nxv8i16(, , ) +declare @llvm.aarch64.sve.smulh.nxv4i32(, , ) +declare @llvm.aarch64.sve.smulh.nxv2i64(, , ) + +declare @llvm.aarch64.sve.umulh.nxv16i8(, , ) +declare @llvm.aarch64.sve.umulh.nxv8i16(, , ) +declare @llvm.aarch64.sve.umulh.nxv4i32(, , ) +declare @llvm.aarch64.sve.umulh.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sdiv.nxv4i32(, , ) +declare @llvm.aarch64.sve.sdiv.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sdivr.nxv4i32(, , ) +declare @llvm.aarch64.sve.sdivr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.udiv.nxv4i32(, , ) +declare @llvm.aarch64.sve.udiv.nxv2i64(, , ) + +declare @llvm.aarch64.sve.udivr.nxv4i32(, , ) +declare @llvm.aarch64.sve.udivr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.smin.nxv16i8(, , ) +declare @llvm.aarch64.sve.smin.nxv8i16(, , ) +declare @llvm.aarch64.sve.smin.nxv4i32(, , ) +declare @llvm.aarch64.sve.smin.nxv2i64(, , ) + +declare @llvm.aarch64.sve.smax.nxv16i8(, , ) +declare @llvm.aarch64.sve.smax.nxv8i16(, , ) +declare @llvm.aarch64.sve.smax.nxv4i32(, , ) +declare @llvm.aarch64.sve.smax.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sabd.nxv16i8(, , ) +declare @llvm.aarch64.sve.sabd.nxv8i16(, , ) +declare @llvm.aarch64.sve.sabd.nxv4i32(, , ) +declare @llvm.aarch64.sve.sabd.nxv2i64(, , ) + +declare @llvm.aarch64.sve.umin.nxv16i8(, , ) +declare @llvm.aarch64.sve.umin.nxv8i16(, , ) +declare @llvm.aarch64.sve.umin.nxv4i32(, , ) +declare @llvm.aarch64.sve.umin.nxv2i64(, , ) + +declare @llvm.aarch64.sve.umax.nxv16i8(, , ) +declare @llvm.aarch64.sve.umax.nxv8i16(, , ) +declare @llvm.aarch64.sve.umax.nxv4i32(, , ) +declare @llvm.aarch64.sve.umax.nxv2i64(, , ) + +declare @llvm.aarch64.sve.uabd.nxv16i8(, , ) +declare @llvm.aarch64.sve.uabd.nxv8i16(, , ) +declare @llvm.aarch64.sve.uabd.nxv4i32(, , ) +declare @llvm.aarch64.sve.uabd.nxv2i64(, , ) + +attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-movprfx.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-movprfx.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-movprfx.ll @@ -0,0 +1,829 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s + +; +; ASR +; + +define @asr_i8_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: asr_i8_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: asrr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv16i8( %pg, + %b, + %a) + ret %out +} + +define @asr_i8_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: asr_i8_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv16i8( %pg, + %b, + %b) + ret %out +} + +define @asr_i16_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: asr_i16_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: asrr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv8i16( %pg, + %b, + %a) + ret %out +} + +define @asr_i16_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: asr_i16_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv8i16( %pg, + %b, + %b) + ret %out +} + +define @asr_i32_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: asr_i32_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: asrr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @asr_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: asr_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv4i32( %pg, + %b, + %b) + ret %out +} + +define @asr_i64_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: asr_i64_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: asrr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv2i64( %pg, + %b, + %a) + ret %out +} + +define @asr_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: asr_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv2i64( %pg, + %b, + %b) + ret %out +} +; +; LSR +; + +define @lsr_i8_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: lsr_i8_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: lsrr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv16i8( %pg, + %b, + %a) + ret %out +} + +define @lsr_i8_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: lsr_i8_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv16i8( %pg, + %b, + %b) + ret %out +} + +define @lsr_i16_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: lsr_i16_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: lsrr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv8i16( %pg, + %b, + %a) + ret %out +} + +define @lsr_i16_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: lsr_i16_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv8i16( %pg, + %b, + %b) + ret %out +} + +define @lsr_i32_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: lsr_i32_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: lsrr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @lsr_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: lsr_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv4i32( %pg, + %b, + %b) + ret %out +} + +define @lsr_i64_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: lsr_i64_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: lsrr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv2i64( %pg, + %b, + %a) + ret %out +} + +define @lsr_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: lsr_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv2i64( %pg, + %b, + %b) + ret %out +} + +; +; LSL +; + +define @lsl_i8_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: lsl_i8_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: lslr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv16i8( %pg, + %b, + %a) + ret %out +} + +define @lsl_i8_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: lsl_i8_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv16i8( %pg, + %b, + %b) + ret %out +} + +define @lsl_i16_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: lsl_i16_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: lslr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv8i16( %pg, + %b, + %a) + ret %out +} + +define @lsl_i16_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: lsl_i16_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv8i16( %pg, + %b, + %b) + ret %out +} + +define @lsl_i32_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: lsl_i32_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: lslr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @lsl_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: lsl_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv4i32( %pg, + %b, + %b) + ret %out +} + +define @lsl_i64_rev( %pg, %a, %b) #0 { +; CHECK-LABEL: lsl_i64_rev: +; CHECK: // %bb.0: +; CHECK-NEXT: lslr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv2i64( %pg, + %b, + %a) + ret %out +} + +define @lsl_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: lsl_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv2i64( %pg, + %b, + %b) + ret %out +} + +; +; ASR_I +; + +define @asr_i_i8_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: asr_i_i8_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: asr z0.b, p0/m, z0.b, #2 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 2) + %out = call @llvm.aarch64.sve.asr.nxv16i8( %pg, + %b, + %dup) + ret %out +} + +define @asr_i_i16_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: asr_i_i16_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: asr z0.h, p0/m, z0.h, #2 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 2) + %out = call @llvm.aarch64.sve.asr.nxv8i16( %pg, + %b, + %dup) + ret %out +} + +define @asr_i_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: asr_i_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: asr z0.s, p0/m, z0.s, #2 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 2) + %out = call @llvm.aarch64.sve.asr.nxv4i32( %pg, + %b, + %dup) + ret %out +} + +define @asr_i_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: asr_i_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: asr z0.d, p0/m, z0.d, #2 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 2) + %out = call @llvm.aarch64.sve.asr.nxv2i64( %pg, + %b, + %dup) + ret %out +} + +; +; LSR_I +; + +define @lsr_i_i8_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: lsr_i_i8_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: lsr z0.b, p0/m, z0.b, #2 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 2) + %out = call @llvm.aarch64.sve.lsr.nxv16i8( %pg, + %b, + %dup) + ret %out +} + +define @lsr_i_i16_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: lsr_i_i16_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #2 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 2) + %out = call @llvm.aarch64.sve.lsr.nxv8i16( %pg, + %b, + %dup) + ret %out +} + +define @lsr_i_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: lsr_i_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #2 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 2) + %out = call @llvm.aarch64.sve.lsr.nxv4i32( %pg, + %b, + %dup) + ret %out +} + +define @lsr_i_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: lsr_i_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: lsr z0.d, p0/m, z0.d, #2 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 2) + %out = call @llvm.aarch64.sve.lsr.nxv2i64( %pg, + %b, + %dup) + ret %out +} + +; +; LSL_I +; + +define @lsl_i_i8_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: lsl_i_i8_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #2 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 2) + %out = call @llvm.aarch64.sve.lsl.nxv16i8( %pg, + %b, + %dup) + ret %out +} + +define @lsl_i_i16_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: lsl_i_i16_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #2 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 2) + %out = call @llvm.aarch64.sve.lsl.nxv8i16( %pg, + %b, + %dup) + ret %out +} + +define @lsl_i_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: lsl_i_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #2 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 2) + %out = call @llvm.aarch64.sve.lsl.nxv4i32( %pg, + %b, + %dup) + ret %out +} + +define @lsl_i_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: lsl_i_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #2 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 2) + %out = call @llvm.aarch64.sve.lsl.nxv2i64( %pg, + %b, + %dup) + ret %out +} + +; +; ASRD_I +; + +define @asrd_i_i8_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: asrd_i_i8_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asrd.nxv16i8( %pg, + %b, + i32 2) + ret %out +} + +define @asrd_i_i16_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: asrd_i_i16_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asrd.nxv8i16( %pg, + %b, + i32 2) + ret %out +} + +define @asrd_i_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: asrd_i_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asrd.nxv4i32( %pg, + %b, + i32 2) + ret %out +} + +define @asrd_i_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: asrd_i_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asrd.nxv2i64( %pg, + %b, + i32 2) + ret %out +} + +; +; SQSHL_I +; + +define @sqshl_i_i8_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: sqshl_i_i8_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sqshl z0.b, p0/m, z0.b, #2 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 2) + %out = call @llvm.aarch64.sve.sqshl.nxv16i8( %pg, + %b, + %dup) + ret %out +} + +define @sqshl_i_i16_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: sqshl_i_i16_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sqshl z0.h, p0/m, z0.h, #2 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 2) + %out = call @llvm.aarch64.sve.sqshl.nxv8i16( %pg, + %b, + %dup) + ret %out +} + +define @sqshl_i_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: sqshl_i_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sqshl z0.s, p0/m, z0.s, #2 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 2) + %out = call @llvm.aarch64.sve.sqshl.nxv4i32( %pg, + %b, + %dup) + ret %out +} + +define @sqshl_i_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: sqshl_i_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sqshl z0.d, p0/m, z0.d, #2 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 2) + %out = call @llvm.aarch64.sve.sqshl.nxv2i64( %pg, + %b, + %dup) + ret %out +} + +; +; UQSHL_I +; + +define @uqshl_i_i8_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: uqshl_i_i8_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uqshl z0.b, p0/m, z0.b, #2 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 2) + %out = call @llvm.aarch64.sve.uqshl.nxv16i8( %pg, + %b, + %dup) + ret %out +} + +define @uqshl_i_i16_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: uqshl_i_i16_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uqshl z0.h, p0/m, z0.h, #2 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 2) + %out = call @llvm.aarch64.sve.uqshl.nxv8i16( %pg, + %b, + %dup) + ret %out +} + +define @uqshl_i_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: uqshl_i_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uqshl z0.s, p0/m, z0.s, #2 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 2) + %out = call @llvm.aarch64.sve.uqshl.nxv4i32( %pg, + %b, + %dup) + ret %out +} + +define @uqshl_i_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: uqshl_i_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uqshl z0.d, p0/m, z0.d, #2 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 2) + %out = call @llvm.aarch64.sve.uqshl.nxv2i64( %pg, + %b, + %dup) + ret %out +} + +; +; SRSHR_I +; + +define @srshr_i_i8_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: srshr_i_i8_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: srshr z0.b, p0/m, z0.b, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.srshr.nxv16i8( %pg, + %b, + i32 2) + ret %out +} + +define @srshr_i_i16_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: srshr_i_i16_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: srshr z0.h, p0/m, z0.h, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.srshr.nxv8i16( %pg, + %b, + i32 2) + ret %out +} + +define @srshr_i_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: srshr_i_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: srshr z0.s, p0/m, z0.s, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.srshr.nxv4i32( %pg, + %b, + i32 2) + ret %out +} + +define @srshr_i_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: srshr_i_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: srshr z0.d, p0/m, z0.d, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.srshr.nxv2i64( %pg, + %b, + i32 2) + ret %out +} + +; +; URSHR_I +; + +define @urshr_i_i8_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: urshr_i_i8_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: urshr z0.b, p0/m, z0.b, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.urshr.nxv16i8( %pg, + %b, + i32 2) + ret %out +} + +define @urshr_i_i16_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: urshr_i_i16_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: urshr z0.h, p0/m, z0.h, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.urshr.nxv8i16( %pg, + %b, + i32 2) + ret %out +} + +define @urshr_i_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: urshr_i_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: urshr z0.s, p0/m, z0.s, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.urshr.nxv4i32( %pg, + %b, + i32 2) + ret %out +} + +define @urshr_i_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: urshr_i_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: urshr z0.d, p0/m, z0.d, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.urshr.nxv2i64( %pg, + %b, + i32 2) + ret %out +} + +; +; SQSHLU_I +; + +define @sqshlu_i_i8_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: sqshlu_i_i8_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sqshlu z0.b, p0/m, z0.b, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqshlu.nxv16i8( %pg, + %b, + i32 2) + ret %out +} + +define @sqshlu_i_i16_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: sqshlu_i_i16_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sqshlu z0.h, p0/m, z0.h, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqshlu.nxv8i16( %pg, + %b, + i32 2) + ret %out +} + +define @sqshlu_i_i32_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: sqshlu_i_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sqshlu z0.s, p0/m, z0.s, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqshlu.nxv4i32( %pg, + %b, + i32 2) + ret %out +} + +define @sqshlu_i_i64_movprfx( %pg, %a, %b) #0 { +; CHECK-LABEL: sqshlu_i_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sqshlu z0.d, p0/m, z0.d, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqshlu.nxv2i64( %pg, + %b, + i32 2) + ret %out +} + +declare @llvm.aarch64.sve.dup.x.nxv16i8(i8) +declare @llvm.aarch64.sve.dup.x.nxv8i16(i16) +declare @llvm.aarch64.sve.dup.x.nxv4i32(i32) +declare @llvm.aarch64.sve.dup.x.nxv2i64(i64) + +declare @llvm.aarch64.sve.asr.nxv16i8(, , ) +declare @llvm.aarch64.sve.asr.nxv8i16(, , ) +declare @llvm.aarch64.sve.asr.nxv4i32(, , ) +declare @llvm.aarch64.sve.asr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.lsr.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsr.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsr.nxv4i32(, , ) +declare @llvm.aarch64.sve.lsr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.lsl.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsl.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsl.nxv4i32(, , ) +declare @llvm.aarch64.sve.lsl.nxv2i64(, , ) + +declare @llvm.aarch64.sve.asrd.nxv16i8(, , i32) +declare @llvm.aarch64.sve.asrd.nxv8i16(, , i32) +declare @llvm.aarch64.sve.asrd.nxv4i32(, , i32) +declare @llvm.aarch64.sve.asrd.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.sqshl.nxv16i8(, , ) +declare @llvm.aarch64.sve.sqshl.nxv8i16(, , ) +declare @llvm.aarch64.sve.sqshl.nxv4i32(, , ) +declare @llvm.aarch64.sve.sqshl.nxv2i64(, , ) + +declare @llvm.aarch64.sve.uqshl.nxv16i8(, , ) +declare @llvm.aarch64.sve.uqshl.nxv8i16(, , ) +declare @llvm.aarch64.sve.uqshl.nxv4i32(, , ) +declare @llvm.aarch64.sve.uqshl.nxv2i64(, , ) + +declare @llvm.aarch64.sve.srshr.nxv16i8(, , i32) +declare @llvm.aarch64.sve.srshr.nxv8i16(, , i32) +declare @llvm.aarch64.sve.srshr.nxv4i32(, , i32) +declare @llvm.aarch64.sve.srshr.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.urshr.nxv16i8(, , i32) +declare @llvm.aarch64.sve.urshr.nxv8i16(, , i32) +declare @llvm.aarch64.sve.urshr.nxv4i32(, , i32) +declare @llvm.aarch64.sve.urshr.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.sqshlu.nxv16i8(, , i32) +declare @llvm.aarch64.sve.sqshlu.nxv8i16(, , i32) +declare @llvm.aarch64.sve.sqshlu.nxv4i32(, , i32) +declare @llvm.aarch64.sve.sqshlu.nxv2i64(, , i32) + +attributes #0 = { "target-features"="+sve2" }