diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -73,10 +73,10 @@ SBC, // adc, sbc instructions // Arithmetic instructions - ADD_MERGE_OP1, - FADD_MERGE_OP1, - SDIV_MERGE_OP1, - UDIV_MERGE_OP1, + ADD_PRED, + FADD_PRED, + SDIV_PRED, + UDIV_PRED, SMIN_MERGE_OP1, UMIN_MERGE_OP1, SMAX_MERGE_OP1, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1356,9 +1356,9 @@ MAKE_CASE(AArch64ISD::CSINC) MAKE_CASE(AArch64ISD::THREAD_POINTER) MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ) - MAKE_CASE(AArch64ISD::ADD_MERGE_OP1) - MAKE_CASE(AArch64ISD::SDIV_MERGE_OP1) - MAKE_CASE(AArch64ISD::UDIV_MERGE_OP1) + MAKE_CASE(AArch64ISD::ADD_PRED) + MAKE_CASE(AArch64ISD::SDIV_PRED) + MAKE_CASE(AArch64ISD::UDIV_PRED) MAKE_CASE(AArch64ISD::SMIN_MERGE_OP1) MAKE_CASE(AArch64ISD::UMIN_MERGE_OP1) MAKE_CASE(AArch64ISD::SMAX_MERGE_OP1) @@ -1450,7 +1450,7 @@ MAKE_CASE(AArch64ISD::REV) MAKE_CASE(AArch64ISD::REINTERPRET_CAST) MAKE_CASE(AArch64ISD::TBL) - MAKE_CASE(AArch64ISD::FADD_MERGE_OP1) + MAKE_CASE(AArch64ISD::FADD_PRED) MAKE_CASE(AArch64ISD::FADDA_PRED) MAKE_CASE(AArch64ISD::FADDV_PRED) MAKE_CASE(AArch64ISD::FMAXV_PRED) @@ -3424,7 +3424,7 @@ return LowerXALUO(Op, DAG); case ISD::FADD: if (useSVEForFixedLengthVectorVT(Op.getValueType())) - return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_MERGE_OP1); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED); return LowerF128Call(Op, DAG, RTLIB::ADD_F128); case ISD::FSUB: return LowerF128Call(Op, DAG, RTLIB::SUB_F128); @@ -3458,9 +3458,9 @@ case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); case ISD::SDIV: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::SDIV_MERGE_OP1); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::SDIV_PRED); case ISD::UDIV: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::UDIV_MERGE_OP1); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::UDIV_PRED); case ISD::SMIN: return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_MERGE_OP1); case ISD::UMIN: @@ -3530,7 +3530,7 @@ llvm_unreachable("Unexpected request to lower ISD::LOAD"); case ISD::ADD: if (useSVEForFixedLengthVectorVT(Op.getValueType())) - return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_MERGE_OP1); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED); llvm_unreachable("Unexpected request to lower ISD::ADD"); } } @@ -11759,12 +11759,6 @@ N->getOperand(1)); case Intrinsic::aarch64_sve_ext: return LowerSVEIntrinsicEXT(N, DAG); - case Intrinsic::aarch64_sve_sdiv: - return DAG.getNode(AArch64ISD::SDIV_MERGE_OP1, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), N->getOperand(3)); - case Intrinsic::aarch64_sve_udiv: - return DAG.getNode(AArch64ISD::UDIV_MERGE_OP1, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_smin: return DAG.getNode(AArch64ISD::SMIN_MERGE_OP1, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -167,11 +167,13 @@ SDTCVecEltisVT<1,i1>, SDTCisSameAs<2,3> ]>; +// Predicated operations with the result of inactive lanes being unspecified. +def AArch64add_p : SDNode<"AArch64ISD::ADD_PRED", SDT_AArch64Arith>; +def AArch64fadd_p : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>; +def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>; +def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>; + // Merging op1 into the inactive lanes. -def AArch64add_m1 : SDNode<"AArch64ISD::ADD_MERGE_OP1", SDT_AArch64Arith>; -def AArch64fadd_m1 : SDNode<"AArch64ISD::FADD_MERGE_OP1", SDT_AArch64Arith>; -def AArch64sdiv_m1 : SDNode<"AArch64ISD::SDIV_MERGE_OP1", SDT_AArch64Arith>; -def AArch64udiv_m1 : SDNode<"AArch64ISD::UDIV_MERGE_OP1", SDT_AArch64Arith>; def AArch64smin_m1 : SDNode<"AArch64ISD::SMIN_MERGE_OP1", SDT_AArch64Arith>; def AArch64umin_m1 : SDNode<"AArch64ISD::UMIN_MERGE_OP1", SDT_AArch64Arith>; def AArch64smax_m1 : SDNode<"AArch64ISD::SMAX_MERGE_OP1", SDT_AArch64Arith>; @@ -222,7 +224,9 @@ defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", "SUB_ZPZZ", int_aarch64_sve_sub, DestructiveBinaryCommWithRev, "SUBR_ZPmZ", 1>; defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", "SUBR_ZPZZ", int_aarch64_sve_subr, DestructiveBinaryCommWithRev, "SUB_ZPmZ", 0>; - defm ADD_ZPZZ : sve_int_bin_pred_zx; + defm ADD_ZPZZ : sve_int_bin_pred_bhsd; + + defm ADD_ZPZZ : sve_int_bin_pred_zx; defm SUB_ZPZZ : sve_int_bin_pred_zx; defm SUBR_ZPZZ : sve_int_bin_pred_zx; @@ -279,10 +283,13 @@ def : Pat<(mul nxv2i64:$Op1, nxv2i64:$Op2), (MUL_ZPmZ_D (PTRUE_D 31), $Op1, $Op2)>; - defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", AArch64sdiv_m1>; - defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", AArch64udiv_m1>; - defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr", int_aarch64_sve_sdivr>; - defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr", int_aarch64_sve_udivr>; + defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", "SDIV_ZPZZ", int_aarch64_sve_sdiv, DestructiveBinaryCommWithRev, "SDIVR_ZPmZ", 1>; + defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", "UDIV_ZPZZ", int_aarch64_sve_udiv, DestructiveBinaryCommWithRev, "UDIVR_ZPmZ", 1>; + defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr", "SDIVR_ZPZZ", int_aarch64_sve_sdivr, DestructiveBinaryCommWithRev, "SDIV_ZPmZ", 0>; + defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr", "UDIVR_ZPZZ", int_aarch64_sve_udivr, DestructiveBinaryCommWithRev, "UDIV_ZPmZ", 0>; + + defm SDIV_ZPZZ : sve_int_bin_pred_sd; + defm UDIV_ZPZZ : sve_int_bin_pred_sd; defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot", int_aarch64_sve_sdot>; defm UDOT_ZZZ : sve_intx_dot<0b1, "udot", int_aarch64_sve_udot>; @@ -345,7 +352,9 @@ defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr", "FDIVR_ZPZZ", int_aarch64_sve_fdivr, DestructiveBinaryCommWithRev, "FDIV_ZPmZ", 0>; defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv", "FDIV_ZPZZ", int_aarch64_sve_fdiv, DestructiveBinaryCommWithRev, "FDIVR_ZPmZ", 1>; - defm FADD_ZPZZ : sve_fp_2op_p_zds_zx; + defm FADD_ZPZZ : sve_fp_bin_pred_hfd; + + defm FADD_ZPZZ : sve_fp_2op_p_zds_zx; defm FSUB_ZPZZ : sve_fp_2op_p_zds_zx; defm FMUL_ZPZZ : sve_fp_2op_p_zds_zx; defm FSUBR_ZPZZ : sve_fp_2op_p_zds_zx; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -1596,23 +1596,14 @@ def : SVE_3_Op_Pat(NAME # _D)>; } -multiclass sve_fp_2op_p_zds_zx { - def _UNDEF_H : PredTwoOpPseudo; - def _UNDEF_S : PredTwoOpPseudo; - def _UNDEF_D : PredTwoOpPseudo; - +multiclass sve_fp_2op_p_zds_zx { def _ZERO_H : PredTwoOpPseudo; def _ZERO_S : PredTwoOpPseudo; def _ZERO_D : PredTwoOpPseudo; - def : SVE_3_Op_Pat(NAME # _UNDEF_H)>; - def : SVE_3_Op_Pat(NAME # _UNDEF_S)>; - def : SVE_3_Op_Pat(NAME # _UNDEF_D)>; - - def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_H)>; - def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_S)>; - def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_D)>; + def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_H)>; + def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_S)>; + def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_D)>; } class sve_fp_ftmad sz, string asm, ZPRRegOp zprty> @@ -2404,9 +2395,16 @@ } // Special case for divides which are not defined for 8b/16b elements. -multiclass sve_int_bin_pred_arit_2_div opc, string asm, SDPatternOperator op> { - def _S : sve_int_bin_pred_arit_log<0b10, 0b10, opc, asm, ZPR32>; - def _D : sve_int_bin_pred_arit_log<0b11, 0b10, opc, asm, ZPR64>; +multiclass sve_int_bin_pred_arit_2_div opc, string asm, string Ps, + SDPatternOperator op, + DestructiveInstTypeEnum flags, + string revname="", bit isOrig=0> { + let DestructiveInstType = flags in { + def _S : sve_int_bin_pred_arit_log<0b10, 0b10, opc, asm, ZPR32>, + SVEPseudo2Instr, SVEInstr2Rev; + def _D : sve_int_bin_pred_arit_log<0b11, 0b10, opc, asm, ZPR64>, + SVEPseudo2Instr, SVEInstr2Rev; + } def : SVE_3_Op_Pat(NAME # _S)>; def : SVE_3_Op_Pat(NAME # _D)>; @@ -4865,27 +4863,16 @@ def : SVE_3_Op_Pat(NAME # _D)>; } -multiclass sve_int_bin_pred_zx { - def _UNDEF_B : PredTwoOpPseudo; - def _UNDEF_H : PredTwoOpPseudo; - def _UNDEF_S : PredTwoOpPseudo; - def _UNDEF_D : PredTwoOpPseudo; - +multiclass sve_int_bin_pred_zx { def _ZERO_B : PredTwoOpPseudo; def _ZERO_H : PredTwoOpPseudo; def _ZERO_S : PredTwoOpPseudo; def _ZERO_D : PredTwoOpPseudo; - def : SVE_3_Op_Pat(NAME # _UNDEF_B)>; - def : SVE_3_Op_Pat(NAME # _UNDEF_H)>; - def : SVE_3_Op_Pat(NAME # _UNDEF_S)>; - def : SVE_3_Op_Pat(NAME # _UNDEF_D)>; - - def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_B)>; - def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_H)>; - def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_S)>; - def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_D)>; + def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_B)>; + def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_H)>; + def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_S)>; + def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_D)>; } multiclass sve_int_bin_pred_shift_wide opc, string asm, @@ -7810,3 +7797,36 @@ def am_sve_regreg_lsl1 : ComplexPattern", []>; def am_sve_regreg_lsl2 : ComplexPattern", []>; def am_sve_regreg_lsl3 : ComplexPattern", []>; + +// Predicated pseudo floating point two operand instructions. +multiclass sve_fp_bin_pred_hfd { + def _UNDEF_H : PredTwoOpPseudo; + def _UNDEF_S : PredTwoOpPseudo; + def _UNDEF_D : PredTwoOpPseudo; + + def : SVE_3_Op_Pat(NAME # _UNDEF_H)>; + def : SVE_3_Op_Pat(NAME # _UNDEF_S)>; + def : SVE_3_Op_Pat(NAME # _UNDEF_D)>; +} + +// Predicated pseudo integer two operand instructions. +multiclass sve_int_bin_pred_bhsd { + def _UNDEF_B : PredTwoOpPseudo; + def _UNDEF_H : PredTwoOpPseudo; + def _UNDEF_S : PredTwoOpPseudo; + def _UNDEF_D : PredTwoOpPseudo; + + def : SVE_3_Op_Pat(NAME # _UNDEF_B)>; + def : SVE_3_Op_Pat(NAME # _UNDEF_H)>; + def : SVE_3_Op_Pat(NAME # _UNDEF_S)>; + def : SVE_3_Op_Pat(NAME # _UNDEF_D)>; +} + +// As sve_int_bin_pred but when only i32 and i64 vector types are required. +multiclass sve_int_bin_pred_sd { + def _UNDEF_S : PredTwoOpPseudo; + def _UNDEF_D : PredTwoOpPseudo; + + def : SVE_3_Op_Pat(NAME # _UNDEF_S)>; + def : SVE_3_Op_Pat(NAME # _UNDEF_D)>; +} diff --git a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll --- a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll +++ b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll @@ -67,7 +67,7 @@ ; CHECK-LABEL: srem_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: sdiv z2.s, p0/m, z2.s, z1.s ; CHECK-NEXT: mul z2.s, p0/m, z2.s, z1.s ; CHECK-NEXT: sub z0.s, z0.s, z2.s @@ -80,7 +80,7 @@ ; CHECK-LABEL: srem_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: sdiv z2.d, p0/m, z2.d, z1.d ; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d ; CHECK-NEXT: sub z0.d, z0.d, z2.d @@ -156,7 +156,7 @@ ; CHECK-LABEL: urem_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: udiv z2.s, p0/m, z2.s, z1.s ; CHECK-NEXT: mul z2.s, p0/m, z2.s, z1.s ; CHECK-NEXT: sub z0.s, z0.s, z2.s @@ -169,7 +169,7 @@ ; CHECK-LABEL: urem_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: udiv z2.d, p0/m, z2.d, z1.d ; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d ; CHECK-NEXT: sub z0.d, z0.d, z2.d