diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -90,6 +90,15 @@ UMAX_PRED, UMIN_PRED, + // Predicated instructions with the result of inactive lanes provided by the + // last operand. + SXTB_MERGE_PASSTHRU, + SXTH_MERGE_PASSTHRU, + SXTW_MERGE_PASSTHRU, + UXTB_MERGE_PASSTHRU, + UXTH_MERGE_PASSTHRU, + UXTW_MERGE_PASSTHRU, + SETCC_MERGE_ZERO, // Arithmetic instructions which write flags. diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1415,6 +1415,12 @@ MAKE_CASE(AArch64ISD::UDIV_PRED) MAKE_CASE(AArch64ISD::UMAX_PRED) MAKE_CASE(AArch64ISD::UMIN_PRED) + MAKE_CASE(AArch64ISD::SXTB_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::SXTH_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::SXTW_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::UXTB_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::UXTH_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::UXTW_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO) MAKE_CASE(AArch64ISD::ADC) MAKE_CASE(AArch64ISD::SBC) @@ -3242,6 +3248,15 @@ case Intrinsic::aarch64_sve_rev: return DAG.getNode(AArch64ISD::REV, dl, Op.getValueType(), Op.getOperand(1)); + case Intrinsic::aarch64_sve_sxtb: + return DAG.getNode(AArch64ISD::SXTB_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + case Intrinsic::aarch64_sve_sxth: + return DAG.getNode(AArch64ISD::SXTH_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + case Intrinsic::aarch64_sve_sxtw: + return DAG.getNode(AArch64ISD::SXTW_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); case Intrinsic::aarch64_sve_tbl: return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); @@ -3251,6 +3266,15 @@ case Intrinsic::aarch64_sve_trn2: return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::aarch64_sve_uxtb: + return DAG.getNode(AArch64ISD::UXTB_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + case Intrinsic::aarch64_sve_uxth: + return DAG.getNode(AArch64ISD::UXTH_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + case Intrinsic::aarch64_sve_uxtw: + return DAG.getNode(AArch64ISD::UXTW_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); case Intrinsic::aarch64_sve_uzp1: return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -164,12 +164,12 @@ def SDT_AArch64Arith : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, - SDTCVecEltisVT<1,i1>, SDTCisSameAs<2,3> + SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3> ]>; def SDT_AArch64FMA : SDTypeProfile<1, 4, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCisVec<4>, - SDTCVecEltisVT<1,i1>, SDTCisSameAs<2,3>, SDTCisSameAs<3,4> + SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3>, SDTCisSameAs<3,4> ]>; // Predicated operations with the result of inactive lanes being unspecified. @@ -190,6 +190,19 @@ def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>; def AArch64umin_p : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>; +def SDT_AArch64OneOpPassthru : SDTypeProfile<1, 3, [ + SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, + SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3> +]>; + +// Predicated operations with the result of inactive lanes provided by last operand. +def AArch64sxtb_mt : SDNode<"AArch64ISD::SXTB_MERGE_PASSTHRU", SDT_AArch64OneOpPassthru>; +def AArch64sxth_mt : SDNode<"AArch64ISD::SXTH_MERGE_PASSTHRU", SDT_AArch64OneOpPassthru>; +def AArch64sxtw_mt : SDNode<"AArch64ISD::SXTW_MERGE_PASSTHRU", SDT_AArch64OneOpPassthru>; +def AArch64uxtb_mt : SDNode<"AArch64ISD::UXTB_MERGE_PASSTHRU", SDT_AArch64OneOpPassthru>; +def AArch64uxth_mt : SDNode<"AArch64ISD::UXTH_MERGE_PASSTHRU", SDT_AArch64OneOpPassthru>; +def AArch64uxtw_mt : SDNode<"AArch64ISD::UXTW_MERGE_PASSTHRU", SDT_AArch64OneOpPassthru>; + def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>; def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>; def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>; @@ -312,12 +325,12 @@ defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot", int_aarch64_sve_sdot_lane>; defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot", int_aarch64_sve_udot_lane>; - defm SXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b000, "sxtb", int_aarch64_sve_sxtb>; - defm UXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b001, "uxtb", int_aarch64_sve_uxtb>; - defm SXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b010, "sxth", int_aarch64_sve_sxth>; - defm UXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b011, "uxth", int_aarch64_sve_uxth>; - defm SXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b100, "sxtw", int_aarch64_sve_sxtw>; - defm UXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b101, "uxtw", int_aarch64_sve_uxtw>; + defm SXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b000, "sxtb", AArch64sxtb_mt>; + defm UXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b001, "uxtb", AArch64uxtb_mt>; + defm SXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b010, "sxth", AArch64sxth_mt>; + defm UXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b011, "uxth", AArch64uxth_mt>; + defm SXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b100, "sxtw", AArch64sxtw_mt>; + defm UXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b101, "uxtw", AArch64uxtw_mt>; defm ABS_ZPmZ : sve_int_un_pred_arit_0< 0b110, "abs", int_aarch64_sve_abs>; defm NEG_ZPmZ : sve_int_un_pred_arit_0< 0b111, "neg", int_aarch64_sve_neg>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -305,6 +305,11 @@ : Pat<(vtd (op vt1:$Op1)), (inst $Op1)>; +class SVE_1_Op_Passthru +: Pat<(vtd (op pg:$Op1, vts:$Op2, vtd:$Op3)), + (inst $Op3, $Op1, $Op2)>; + class SVE_1_Op_Imm_OptLsl_Reverse_Pat : Pat<(vt (op (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))), (vt zprty:$Op1))), @@ -3688,9 +3693,9 @@ def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>; def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; + def : SVE_1_Op_Passthru(NAME # _H)>; + def : SVE_1_Op_Passthru(NAME # _S)>; + def : SVE_1_Op_Passthru(NAME # _D)>; } multiclass sve_int_un_pred_arit_0_w opc, string asm, @@ -3698,15 +3703,15 @@ def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>; def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; + def : SVE_1_Op_Passthru(NAME # _S)>; + def : SVE_1_Op_Passthru(NAME # _D)>; } multiclass sve_int_un_pred_arit_0_d opc, string asm, SDPatternOperator op> { def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>; - def : SVE_3_Op_Pat(NAME # _D)>; + def : SVE_1_Op_Passthru(NAME # _D)>; } multiclass sve_int_un_pred_arit_1 opc, string asm,