diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -73,10 +73,10 @@ SBC, // adc, sbc instructions // Arithmetic instructions - ADD_MERGE_OP1, - FADD_MERGE_OP1, - SDIV_MERGE_OP1, - UDIV_MERGE_OP1, + ADD_PRED, + FADD_PRED, + SDIV_PRED, + UDIV_PRED, SMIN_MERGE_OP1, UMIN_MERGE_OP1, SMAX_MERGE_OP1, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1356,9 +1356,9 @@ MAKE_CASE(AArch64ISD::CSINC) MAKE_CASE(AArch64ISD::THREAD_POINTER) MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ) - MAKE_CASE(AArch64ISD::ADD_MERGE_OP1) - MAKE_CASE(AArch64ISD::SDIV_MERGE_OP1) - MAKE_CASE(AArch64ISD::UDIV_MERGE_OP1) + MAKE_CASE(AArch64ISD::ADD_PRED) + MAKE_CASE(AArch64ISD::SDIV_PRED) + MAKE_CASE(AArch64ISD::UDIV_PRED) MAKE_CASE(AArch64ISD::SMIN_MERGE_OP1) MAKE_CASE(AArch64ISD::UMIN_MERGE_OP1) MAKE_CASE(AArch64ISD::SMAX_MERGE_OP1) @@ -1450,7 +1450,7 @@ MAKE_CASE(AArch64ISD::REV) MAKE_CASE(AArch64ISD::REINTERPRET_CAST) MAKE_CASE(AArch64ISD::TBL) - MAKE_CASE(AArch64ISD::FADD_MERGE_OP1) + MAKE_CASE(AArch64ISD::FADD_PRED) MAKE_CASE(AArch64ISD::FADDA_PRED) MAKE_CASE(AArch64ISD::FADDV_PRED) MAKE_CASE(AArch64ISD::FMAXV_PRED) @@ -3424,7 +3424,7 @@ return LowerXALUO(Op, DAG); case ISD::FADD: if (useSVEForFixedLengthVectorVT(Op.getValueType())) - return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_MERGE_OP1); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED); return LowerF128Call(Op, DAG, RTLIB::ADD_F128); case ISD::FSUB: return LowerF128Call(Op, DAG, RTLIB::SUB_F128); @@ -3458,9 +3458,9 @@ case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); case ISD::SDIV: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::SDIV_MERGE_OP1); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::SDIV_PRED); case ISD::UDIV: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::UDIV_MERGE_OP1); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::UDIV_PRED); case ISD::SMIN: return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_MERGE_OP1); case ISD::UMIN: @@ -3530,7 +3530,7 @@ llvm_unreachable("Unexpected request to lower ISD::LOAD"); case ISD::ADD: if (useSVEForFixedLengthVectorVT(Op.getValueType())) - return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_MERGE_OP1); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED); llvm_unreachable("Unexpected request to lower ISD::ADD"); } } @@ -11759,12 +11759,6 @@ N->getOperand(1)); case Intrinsic::aarch64_sve_ext: return LowerSVEIntrinsicEXT(N, DAG); - case Intrinsic::aarch64_sve_sdiv: - return DAG.getNode(AArch64ISD::SDIV_MERGE_OP1, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), N->getOperand(3)); - case Intrinsic::aarch64_sve_udiv: - return DAG.getNode(AArch64ISD::UDIV_MERGE_OP1, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_smin: return DAG.getNode(AArch64ISD::SMIN_MERGE_OP1, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -167,11 +167,13 @@ SDTCVecEltisVT<1,i1>, SDTCisSameAs<2,3> ]>; +// Predicated operations with the result of inactive lanes being unspecified. +def AArch64add_p : SDNode<"AArch64ISD::ADD_PRED", SDT_AArch64Arith>; +def AArch64fadd_p : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>; +def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>; +def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>; + // Merging op1 into the inactive lanes. -def AArch64add_m1 : SDNode<"AArch64ISD::ADD_MERGE_OP1", SDT_AArch64Arith>; -def AArch64fadd_m1 : SDNode<"AArch64ISD::FADD_MERGE_OP1", SDT_AArch64Arith>; -def AArch64sdiv_m1 : SDNode<"AArch64ISD::SDIV_MERGE_OP1", SDT_AArch64Arith>; -def AArch64udiv_m1 : SDNode<"AArch64ISD::UDIV_MERGE_OP1", SDT_AArch64Arith>; def AArch64smin_m1 : SDNode<"AArch64ISD::SMIN_MERGE_OP1", SDT_AArch64Arith>; def AArch64umin_m1 : SDNode<"AArch64ISD::UMIN_MERGE_OP1", SDT_AArch64Arith>; def AArch64smax_m1 : SDNode<"AArch64ISD::SMAX_MERGE_OP1", SDT_AArch64Arith>; @@ -222,7 +224,7 @@ defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", "SUB_ZPZZ", int_aarch64_sve_sub, DestructiveBinaryCommWithRev, "SUBR_ZPmZ", 1>; defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", "SUBR_ZPZZ", int_aarch64_sve_subr, DestructiveBinaryCommWithRev, "SUB_ZPmZ", 0>; - defm ADD_ZPZZ : sve_int_bin_pred_zx; + defm ADD_ZPZZ : sve_int_bin_pred_zx; defm SUB_ZPZZ : sve_int_bin_pred_zx; defm SUBR_ZPZZ : sve_int_bin_pred_zx; @@ -279,10 +281,15 @@ def : Pat<(mul nxv2i64:$Op1, nxv2i64:$Op2), (MUL_ZPmZ_D (PTRUE_D 31), $Op1, $Op2)>; - defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", AArch64sdiv_m1>; - defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", AArch64udiv_m1>; - defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr", int_aarch64_sve_sdivr>; - defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr", int_aarch64_sve_udivr>; + defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", "SDIV_ZPZZ", int_aarch64_sve_sdiv, DestructiveBinaryCommWithRev, "SDIVR_ZPmZ", 1>; + defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", "UDIV_ZPZZ", int_aarch64_sve_udiv, DestructiveBinaryCommWithRev, "UDIVR_ZPmZ", 1>; + defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr", "SDIVR_ZPZZ", int_aarch64_sve_sdivr, DestructiveBinaryCommWithRev, "SDIV_ZPmZ", 0>; + defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr", "UDIVR_ZPZZ", int_aarch64_sve_udivr, DestructiveBinaryCommWithRev, "UDIV_ZPmZ", 0>; + + defm SDIV_ZPZZ : sve_int_bin_pred_zx_sd; + defm UDIV_ZPZZ : sve_int_bin_pred_zx_sd; + defm SDIVR_ZPZZ : sve_int_bin_pred_zx_sd; + defm UDIVR_ZPZZ : sve_int_bin_pred_zx_sd; defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot", int_aarch64_sve_sdot>; defm UDOT_ZZZ : sve_intx_dot<0b1, "udot", int_aarch64_sve_udot>; @@ -345,7 +352,7 @@ defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr", "FDIVR_ZPZZ", int_aarch64_sve_fdivr, DestructiveBinaryCommWithRev, "FDIV_ZPmZ", 0>; defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv", "FDIV_ZPZZ", int_aarch64_sve_fdiv, DestructiveBinaryCommWithRev, "FDIVR_ZPmZ", 1>; - defm FADD_ZPZZ : sve_fp_2op_p_zds_zx; + defm FADD_ZPZZ : sve_fp_2op_p_zds_zx; defm FSUB_ZPZZ : sve_fp_2op_p_zds_zx; defm FMUL_ZPZZ : sve_fp_2op_p_zds_zx; defm FSUBR_ZPZZ : sve_fp_2op_p_zds_zx; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2404,9 +2404,16 @@ } // Special case for divides which are not defined for 8b/16b elements. -multiclass sve_int_bin_pred_arit_2_div opc, string asm, SDPatternOperator op> { - def _S : sve_int_bin_pred_arit_log<0b10, 0b10, opc, asm, ZPR32>; - def _D : sve_int_bin_pred_arit_log<0b11, 0b10, opc, asm, ZPR64>; +multiclass sve_int_bin_pred_arit_2_div opc, string asm, string Ps, + SDPatternOperator op, + DestructiveInstTypeEnum flags, + string revname="", bit isOrig=0> { + let DestructiveInstType = flags in { + def _S : sve_int_bin_pred_arit_log<0b10, 0b10, opc, asm, ZPR32>, + SVEPseudo2Instr, SVEInstr2Rev; + def _D : sve_int_bin_pred_arit_log<0b11, 0b10, opc, asm, ZPR64>, + SVEPseudo2Instr, SVEInstr2Rev; + } def : SVE_3_Op_Pat(NAME # _S)>; def : SVE_3_Op_Pat(NAME # _D)>; @@ -4888,6 +4895,21 @@ def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_D)>; } +multiclass sve_int_bin_pred_zx_sd { + def _UNDEF_S : PredTwoOpPseudo; + def _UNDEF_D : PredTwoOpPseudo; + + def _ZERO_S : PredTwoOpPseudo; + def _ZERO_D : PredTwoOpPseudo; + + def : SVE_3_Op_Pat(NAME # _UNDEF_S)>; + def : SVE_3_Op_Pat(NAME # _UNDEF_D)>; + + def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_S)>; + def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_D)>; +} + multiclass sve_int_bin_pred_shift_wide opc, string asm, SDPatternOperator op> { def _B : sve_int_bin_pred_shift<0b00, 0b1, opc, asm, ZPR8, ZPR64>; diff --git a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll --- a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll +++ b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll @@ -67,7 +67,7 @@ ; CHECK-LABEL: srem_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: sdiv z2.s, p0/m, z2.s, z1.s ; CHECK-NEXT: mul z2.s, p0/m, z2.s, z1.s ; CHECK-NEXT: sub z0.s, z0.s, z2.s @@ -80,7 +80,7 @@ ; CHECK-LABEL: srem_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: sdiv z2.d, p0/m, z2.d, z1.d ; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d ; CHECK-NEXT: sub z0.d, z0.d, z2.d @@ -156,7 +156,7 @@ ; CHECK-LABEL: urem_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: udiv z2.s, p0/m, z2.s, z1.s ; CHECK-NEXT: mul z2.s, p0/m, z2.s, z1.s ; CHECK-NEXT: sub z0.s, z0.s, z2.s @@ -169,7 +169,7 @@ ; CHECK-LABEL: urem_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: udiv z2.d, p0/m, z2.d, z1.d ; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d ; CHECK-NEXT: sub z0.d, z0.d, z2.d diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll @@ -55,6 +55,62 @@ ret %out } +; +; SDIV +; + +define @sdiv_i32_zero( %pg, %a, %b) { +; CHECK-LABEL: sdiv_i32_zero: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.sdiv.nxv4i32( %pg, + %a_z, + %b) + ret %out +} + +define @sdiv_i64_zero( %pg, %a, %b) { +; CHECK-LABEL: sdiv_i64_zero: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.sdiv.nxv2i64( %pg, + %a_z, + %b) + ret %out +} + +; +; SDIVR +; + +define @sdivr_i32_zero( %pg, %a, %b) { +; CHECK-LABEL: sdivr_i32_zero: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: sdivr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.sdivr.nxv4i32( %pg, + %a_z, + %b) + ret %out +} + +define @sdivr_i64_zero( %pg, %a, %b) { +; CHECK-LABEL: sdivr_i64_zero: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: sdivr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.sdivr.nxv2i64( %pg, + %a_z, + %b) + ret %out +} + ; ; SUB ; @@ -159,11 +215,73 @@ ret %out } +; +; UDIV +; + +define @udiv_i32_zero( %pg, %a, %b) { +; CHECK-LABEL: udiv_i32_zero: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.udiv.nxv4i32( %pg, + %a_z, + %b) + ret %out +} + +define @udiv_i64_zero( %pg, %a, %b) { +; CHECK-LABEL: udiv_i64_zero: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.udiv.nxv2i64( %pg, + %a_z, + %b) + ret %out +} + +; +; UDIVR +; + +define @udivr_i32_zero( %pg, %a, %b) { +; CHECK-LABEL: udivr_i32_zero: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: udivr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.udivr.nxv4i32( %pg, + %a_z, + %b) + ret %out +} + +define @udivr_i64_zero( %pg, %a, %b) { +; CHECK-LABEL: udivr_i64_zero: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: udivr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.udivr.nxv2i64( %pg, + %a_z, + %b) + ret %out +} + declare @llvm.aarch64.sve.add.nxv16i8(, , ) declare @llvm.aarch64.sve.add.nxv8i16(, , ) declare @llvm.aarch64.sve.add.nxv4i32(, , ) declare @llvm.aarch64.sve.add.nxv2i64(, , ) +declare @llvm.aarch64.sve.sdiv.nxv4i32(, , ) +declare @llvm.aarch64.sve.sdiv.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sdivr.nxv4i32(, , ) +declare @llvm.aarch64.sve.sdivr.nxv2i64(, , ) + declare @llvm.aarch64.sve.sub.nxv16i8(, , ) declare @llvm.aarch64.sve.sub.nxv8i16(, , ) declare @llvm.aarch64.sve.sub.nxv4i32(, , ) @@ -173,3 +291,9 @@ declare @llvm.aarch64.sve.subr.nxv8i16(, , ) declare @llvm.aarch64.sve.subr.nxv4i32(, , ) declare @llvm.aarch64.sve.subr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.udiv.nxv4i32(, , ) +declare @llvm.aarch64.sve.udiv.nxv2i64(, , ) + +declare @llvm.aarch64.sve.udivr.nxv4i32(, , ) +declare @llvm.aarch64.sve.udivr.nxv2i64(, , )