Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -372,6 +372,7 @@ PMULDQ, // FMA nodes + FMARM, FMADD, FNMADD, FMSUB, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -16950,6 +16950,22 @@ Op.getOperand(2)), Op.getOperand(4), Op.getOperand(3), Subtarget, DAG); } + case INTR_TYPE_3OP_MASK_RM:{ + SDValue Src1 = Op.getOperand(1); + SDValue Src2 = Op.getOperand(2); + SDValue Src3 = Op.getOperand(3); + SDValue Mask = Op.getOperand(4); + SDValue RoundingMode = Op.getOperand(5); + auto *SAE = cast(RoundingMode); + if (SAE->getZExtValue() == X86::STATIC_ROUNDING::CUR_DIRECTION){ + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, Src3), + Mask, Src1, Subtarget, DAG); + } else { + SDValue fmaNode = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, Src3); + return getVectorMaskingNode(DAG.getNode(X86ISD::FMARM, dl, VT, fmaNode, RoundingMode), + Mask, Src1, Subtarget, DAG); + } + } case CMP_MASK: case CMP_MASK_CC: { // Comparison intrinsics with masks. @@ -17219,30 +17235,6 @@ return DAG.getNode(Opcode, dl, VTs, NewOps); } - case Intrinsic::x86_fma_mask_vfmadd_ps_512: - case Intrinsic::x86_fma_mask_vfmadd_pd_512: - case Intrinsic::x86_fma_mask_vfmsub_ps_512: - case Intrinsic::x86_fma_mask_vfmsub_pd_512: - case Intrinsic::x86_fma_mask_vfnmadd_ps_512: - case Intrinsic::x86_fma_mask_vfnmadd_pd_512: - case Intrinsic::x86_fma_mask_vfnmsub_ps_512: - case Intrinsic::x86_fma_mask_vfnmsub_pd_512: - case Intrinsic::x86_fma_mask_vfmaddsub_ps_512: - case Intrinsic::x86_fma_mask_vfmaddsub_pd_512: - case Intrinsic::x86_fma_mask_vfmsubadd_ps_512: - case Intrinsic::x86_fma_mask_vfmsubadd_pd_512: { - auto *SAE = cast(Op.getOperand(5)); - if (SAE->getZExtValue() == X86::STATIC_ROUNDING::CUR_DIRECTION) - return getVectorMaskingNode(DAG.getNode(getOpcodeForFMAIntrinsic(IntNo), - dl, Op.getValueType(), - Op.getOperand(1), - Op.getOperand(2), - Op.getOperand(3)), - Op.getOperand(4), Op.getOperand(1), - Subtarget, DAG); - else - return SDValue(); - } case Intrinsic::x86_fma_vfmadd_ps: case Intrinsic::x86_fma_vfmadd_pd: Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -3561,6 +3561,51 @@ defm VFNMSUB : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub>; let Constraints = "$src1 = $dst" in { +// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching. +multiclass avx512_fma3_rm_rrb opc, string OpcodeStr, X86VectorVTInfo _, + SDPatternOperator OpNode = null_frag> { + defm rb: AVX512_maskable_3src, + AVX512FMA3Base, EVEX_B, EVEX_RC; + } +} // Constraints = "$src1 = $dst" + +multiclass avx512_fma3_rm_forms opc213, bits<8> opc231, bits<8> opc132, + string OpcodeStr, X86VectorVTInfo VTI, + SDPatternOperator OpNode> { + defm v213r : avx512_fma3_rm_rrb, EVEX_CD8; + + defm v231r : avx512_fma3_rm_rrb, EVEX_CD8; + + defm v132r : avx512_fma3_rm_rrb, EVEX_CD8; +} + +multiclass avx512_fma3_rm opc213, bits<8> opc231, bits<8> opc132, + string OpcodeStr, + SDPatternOperator OpNode> { +let ExeDomain = SSEPackedSingle in { + defm NAME##PSZ : avx512_fma3_rm_forms, EVEX_V512; + } +let ExeDomain = SSEPackedDouble in { + defm NAME##PDZ : avx512_fma3_rm_forms, EVEX_V512, VEX_W; + } +} + +defm VFMADDRM : avx512_fma3_rm<0xA8, 0xB8, 0x98, "vfmadd", X86Fmadd>; +defm VFMSUBRM : avx512_fma3_rm<0xAA, 0xBA, 0x9A, "vfmsub", X86Fmsub>; +defm VFMADDSUBRM : avx512_fma3_rm<0xA6, 0xB6, 0x96, "vfmaddsub", X86Fmaddsub>; +defm VFMSUBADDRM : avx512_fma3_rm<0xA7, 0xB7, 0x97, "vfmsubadd", X86Fmsubadd>; +defm VFNMADDRM : avx512_fma3_rm<0xAC, 0xBC, 0x9C, "vfnmadd", X86Fnmadd>; +defm VFNMSUBRM : avx512_fma3_rm<0xAE, 0xBE, 0x9E, "vfnmsub", X86Fnmsub>; + +let Constraints = "$src1 = $dst" in { multiclass avx512_fma3p_m132 opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { let mayLoad = 1 in Index: lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- lib/Target/X86/X86InstrFragmentsSIMD.td +++ lib/Target/X86/X86InstrFragmentsSIMD.td @@ -203,6 +203,7 @@ def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>, SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>; +def SDTFmaRm : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>; def STDFp1SrcRm : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisVec<0>, SDTCisInt<2>]>; def STDFp2SrcRm : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>, @@ -258,6 +259,7 @@ def X86Addsub : SDNode<"X86ISD::ADDSUB", SDTFPBinOp>; +def X86FmaRm : SDNode<"X86ISD::FMARM", SDTFmaRm>; def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>; def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>; def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFma>; Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -21,7 +21,7 @@ GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI, - INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, FMA_OP_MASK, INTR_TYPE_SCALAR_MASK_RM, + INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_3OP_MASK_RM, FMA_OP_MASK, INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, EXPAND_FROM_MEM, BLEND }; @@ -400,28 +400,40 @@ X86_INTRINSIC_DATA(avx_vperm2f128_si_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0), X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0), X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_512, INTR_TYPE_3OP_MASK_RM, X86ISD::FMADD, 0), X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_128, FMA_OP_MASK, X86ISD::FMADD, 0), X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_256, FMA_OP_MASK, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_512, INTR_TYPE_3OP_MASK_RM, X86ISD::FMADD, 0), X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0), X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0), + X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_512, INTR_TYPE_3OP_MASK_RM, X86ISD::FMADDSUB, 0), X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0), X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0), + X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_512, INTR_TYPE_3OP_MASK_RM, X86ISD::FMADDSUB, 0), X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_128, FMA_OP_MASK, X86ISD::FMSUB, 0), X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_256, FMA_OP_MASK, X86ISD::FMSUB, 0), + X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_512, INTR_TYPE_3OP_MASK_RM, X86ISD::FMSUB, 0), X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_128, FMA_OP_MASK, X86ISD::FMSUB, 0), X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_256, FMA_OP_MASK, X86ISD::FMSUB, 0), + X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_512, INTR_TYPE_3OP_MASK_RM, X86ISD::FMSUB, 0), X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0), X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0), + X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_512, INTR_TYPE_3OP_MASK_RM, X86ISD::FMSUBADD, 0), X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0), X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0), + X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_512, INTR_TYPE_3OP_MASK_RM, X86ISD::FMSUBADD, 0), X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_128, FMA_OP_MASK, X86ISD::FNMADD, 0), X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_256, FMA_OP_MASK, X86ISD::FNMADD, 0), + X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_512, INTR_TYPE_3OP_MASK_RM, X86ISD::FNMADD, 0), X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_128, FMA_OP_MASK, X86ISD::FNMADD, 0), X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_256, FMA_OP_MASK, X86ISD::FNMADD, 0), + X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_512, INTR_TYPE_3OP_MASK_RM, X86ISD::FNMADD, 0), X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_128, FMA_OP_MASK, X86ISD::FNMSUB, 0), X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_256, FMA_OP_MASK, X86ISD::FNMSUB, 0), + X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_512, INTR_TYPE_3OP_MASK_RM, X86ISD::FNMSUB, 0), X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_128, FMA_OP_MASK, X86ISD::FNMSUB , 0), X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_256, FMA_OP_MASK, X86ISD::FNMSUB, 0), + X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_512, INTR_TYPE_3OP_MASK_RM, X86ISD::FNMSUB, 0), X86_INTRINSIC_DATA(sse2_comieq_sd, COMI, X86ISD::COMI, ISD::SETEQ), X86_INTRINSIC_DATA(sse2_comige_sd, COMI, X86ISD::COMI, ISD::SETGE), X86_INTRINSIC_DATA(sse2_comigt_sd, COMI, X86ISD::COMI, ISD::SETGT),