Index: ../llvmOrg/lib/Target/X86/X86ISelLowering.h =================================================================== --- ../llvmOrg/lib/Target/X86/X86ISelLowering.h +++ ../llvmOrg/lib/Target/X86/X86ISelLowering.h @@ -476,6 +476,7 @@ // ERI instructions RSQRT28, RCP28, EXP2, + RSQRT14, RCP14, // Compare and swap. LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, Index: ../llvmOrg/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- ../llvmOrg/lib/Target/X86/X86ISelLowering.cpp +++ ../llvmOrg/lib/Target/X86/X86ISelLowering.cpp @@ -15448,6 +15448,14 @@ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src), Mask, PassThru, Subtarget, DAG); } + case INTR_TYPE_SCALAR_MASK: { + SDValue Src1 = Op.getOperand(1); + SDValue Src2 = Op.getOperand(2); + SDValue passThru = Op.getOperand(3); + SDValue Mask = Op.getOperand(4); + return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2), + Mask, passThru, Subtarget, DAG); + } case INTR_TYPE_SCALAR_MASK_RM: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); Index: ../llvmOrg/lib/Target/X86/X86InstrAVX512.td =================================================================== --- ../llvmOrg/lib/Target/X86/X86InstrAVX512.td +++ ../llvmOrg/lib/Target/X86/X86InstrAVX512.td @@ -5070,50 +5070,31 @@ } /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd -multiclass avx512_fp14_s opc, string OpcodeStr, RegisterClass RC, - X86MemOperand x86memop> { +multiclass avx512_fp14_s opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { let hasSideEffects = 0 in { - def rr : AVX5128I, EVEX_4V; + defm rr : AVX512_maskable_scalar, EVEX_4V; let mayLoad = 1 in { - def rm : AVX5128I, EVEX_4V; + defm rm : AVX512_maskable_scalar, EVEX_4V; } } } -defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", FR32X, f32mem>, - EVEX_CD8<32, CD8VT1>; -defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", FR64X, f64mem>, - VEX_W, EVEX_CD8<64, CD8VT1>; -defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", FR32X, f32mem>, - EVEX_CD8<32, CD8VT1>; -defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", FR64X, f64mem>, - VEX_W, EVEX_CD8<64, CD8VT1>; - -def : Pat <(v4f32 (int_x86_avx512_rcp14_ss (v4f32 VR128X:$src1), - (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))), - (COPY_TO_REGCLASS (VRCP14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X), - (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>; - -def : Pat <(v2f64 (int_x86_avx512_rcp14_sd (v2f64 VR128X:$src1), - (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))), - (COPY_TO_REGCLASS (VRCP14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X), - (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>; - -def : Pat <(v4f32 (int_x86_avx512_rsqrt14_ss (v4f32 VR128X:$src1), - (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))), - (COPY_TO_REGCLASS (VRSQRT14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X), - (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>; - -def : Pat <(v2f64 (int_x86_avx512_rsqrt14_sd (v2f64 VR128X:$src1), - (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))), - (COPY_TO_REGCLASS (VRSQRT14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X), - (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>; +defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86frcp14s, f32x_info>, + EVEX_CD8<32, CD8VT1>, T8PD; +defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86frcp14s, f64x_info>, + VEX_W, EVEX_CD8<64, CD8VT1>, T8PD; +defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86frsqrt14s, f32x_info>, + EVEX_CD8<32, CD8VT1>, T8PD; +defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86frsqrt14s, f64x_info>, + VEX_W, EVEX_CD8<64, CD8VT1>, T8PD; /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd multiclass avx512_fp14_p opc, string OpcodeStr, SDNode OpNode, @@ -5416,15 +5397,14 @@ Requires<[OptForSize]>; def : Pat<(f32 (X86frsqrt FR32X:$src)), - (VRSQRT14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>; + (COPY_TO_REGCLASS (VRSQRT14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X )>; def : Pat<(f32 (X86frsqrt (load addr:$src))), - (VRSQRT14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>, + (COPY_TO_REGCLASS (VRSQRT14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>, Requires<[OptForSize]>; - def : Pat<(f32 (X86frcp FR32X:$src)), - (VRCP14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>; + (COPY_TO_REGCLASS (VRCP14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X )>; def : Pat<(f32 (X86frcp (load addr:$src))), - (VRCP14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>, + (COPY_TO_REGCLASS (VRCP14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>, Requires<[OptForSize]>; def : Pat<(int_x86_sse_sqrt_ss VR128X:$src), Index: ../llvmOrg/lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- ../llvmOrg/lib/Target/X86/X86InstrFragmentsSIMD.td +++ ../llvmOrg/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -58,6 +58,8 @@ [SDNPCommutative, SDNPAssociative]>; def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>; def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>; +def X86frsqrt14s: SDNode<"X86ISD::RSQRT14", SDTFPBinOp>; +def X86frcp14s : SDNode<"X86ISD::RCP14", SDTFPBinOp>; def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>; def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>; def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>; Index: ../llvmOrg/lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- ../llvmOrg/lib/Target/X86/X86IntrinsicsInfo.h +++ ../llvmOrg/lib/Target/X86/X86IntrinsicsInfo.h @@ -24,7 +24,7 @@ INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK, - VPERM_3OP_MASKZ, + VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK, INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, EXPAND_FROM_MEM, BLEND @@ -1376,10 +1376,14 @@ X86ISD::VPERMV3, 0), X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_512, VPERM_3OP_MASKZ, X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::RCP14, 0), + X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::RCP14, 0), X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0), X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0), X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0), X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14, 0), X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0), X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0), X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0), Index: ../llvmOrg/test/MC/X86/avx512-encodings.s =================================================================== --- ../llvmOrg/test/MC/X86/avx512-encodings.s +++ ../llvmOrg/test/MC/X86/avx512-encodings.s @@ -14958,3 +14958,146 @@ // CHECK: encoding: [0x62,0xf2,0xc5,0x08,0x43,0x92,0xf8,0xfb,0xff,0xff] vgetexpsd -1032(%rdx), %xmm7, %xmm2 +// CHECK: vrsqrt14sd %xmm10, %xmm6, %xmm26 +// CHECK: encoding: [0x62,0x42,0xcd,0x08,0x4f,0xd2] + vrsqrt14sd %xmm10, %xmm6, %xmm26 + +// CHECK: vrsqrt14sd %xmm10, %xmm6, %xmm26 {%k5} +// CHECK: encoding: [0x62,0x42,0xcd,0x0d,0x4f,0xd2] + vrsqrt14sd %xmm10, %xmm6, %xmm26 {%k5} + +// CHECK: vrsqrt14sd %xmm10, %xmm6, %xmm26 {%k5} {z} +// CHECK: encoding: [0x62,0x42,0xcd,0x8d,0x4f,0xd2] + vrsqrt14sd %xmm10, %xmm6, %xmm26 {%k5} {z} + +// CHECK: vrsqrt14sd (%rcx), %xmm6, %xmm26 +// CHECK: encoding: [0x62,0x62,0xcd,0x08,0x4f,0x11] + vrsqrt14sd (%rcx), %xmm6, %xmm26 + +// CHECK: vrsqrt14sd 291(%rax,%r14,8), %xmm6, %xmm26 +// CHECK: encoding: [0x62,0x22,0xcd,0x08,0x4f,0x94,0xf0,0x23,0x01,0x00,0x00] + vrsqrt14sd 291(%rax,%r14,8), %xmm6, %xmm26 + +// CHECK: vrsqrt14sd 1016(%rdx), %xmm6, %xmm26 +// CHECK: encoding: [0x62,0x62,0xcd,0x08,0x4f,0x52,0x7f] + vrsqrt14sd 1016(%rdx), %xmm6, %xmm26 + +// CHECK: vrsqrt14sd 1024(%rdx), %xmm6, %xmm26 +// CHECK: encoding: [0x62,0x62,0xcd,0x08,0x4f,0x92,0x00,0x04,0x00,0x00] + vrsqrt14sd 1024(%rdx), %xmm6, %xmm26 + +// CHECK: vrsqrt14sd -1024(%rdx), %xmm6, %xmm26 +// CHECK: encoding: [0x62,0x62,0xcd,0x08,0x4f,0x52,0x80] + vrsqrt14sd -1024(%rdx), %xmm6, %xmm26 + +// CHECK: vrsqrt14sd -1032(%rdx), %xmm6, %xmm26 +// CHECK: encoding: [0x62,0x62,0xcd,0x08,0x4f,0x92,0xf8,0xfb,0xff,0xff] + vrsqrt14sd -1032(%rdx), %xmm6, %xmm26 + +// CHECK: vrsqrt14ss %xmm9, %xmm14, %xmm14 +// CHECK: encoding: [0x62,0x52,0x0d,0x08,0x4f,0xf1] + vrsqrt14ss %xmm9, %xmm14, %xmm14 + +// CHECK: vrsqrt14ss %xmm9, %xmm14, %xmm14 {%k1} +// CHECK: encoding: [0x62,0x52,0x0d,0x09,0x4f,0xf1] + vrsqrt14ss %xmm9, %xmm14, %xmm14 {%k1} + +// CHECK: vrsqrt14ss %xmm9, %xmm14, %xmm14 {%k1} {z} +// CHECK: encoding: [0x62,0x52,0x0d,0x89,0x4f,0xf1] + vrsqrt14ss %xmm9, %xmm14, %xmm14 {%k1} {z} + +// CHECK: vrsqrt14ss (%rcx), %xmm14, %xmm14 +// CHECK: encoding: [0x62,0x72,0x0d,0x08,0x4f,0x31] + vrsqrt14ss (%rcx), %xmm14, %xmm14 + +// CHECK: vrsqrt14ss 291(%rax,%r14,8), %xmm14, %xmm14 +// CHECK: encoding: [0x62,0x32,0x0d,0x08,0x4f,0xb4,0xf0,0x23,0x01,0x00,0x00] + vrsqrt14ss 291(%rax,%r14,8), %xmm14, %xmm14 + +// CHECK: vrsqrt14ss 508(%rdx), %xmm14, %xmm14 +// CHECK: encoding: [0x62,0x72,0x0d,0x08,0x4f,0x72,0x7f] + vrsqrt14ss 508(%rdx), %xmm14, %xmm14 + +// CHECK: vrsqrt14ss 512(%rdx), %xmm14, %xmm14 +// CHECK: encoding: [0x62,0x72,0x0d,0x08,0x4f,0xb2,0x00,0x02,0x00,0x00] + vrsqrt14ss 512(%rdx), %xmm14, %xmm14 + +// CHECK: vrsqrt14ss -512(%rdx), %xmm14, %xmm14 +// CHECK: encoding: [0x62,0x72,0x0d,0x08,0x4f,0x72,0x80] + vrsqrt14ss -512(%rdx), %xmm14, %xmm14 + +// CHECK: vrsqrt14ss -516(%rdx), %xmm14, %xmm14 +// CHECK: encoding: [0x62,0x72,0x0d,0x08,0x4f,0xb2,0xfc,0xfd,0xff,0xff] + vrsqrt14ss -516(%rdx), %xmm14, %xmm14 + +// CHECK: vrcp14sd %xmm14, %xmm22, %xmm12 +// CHECK: encoding: [0x62,0x52,0xcd,0x00,0x4d,0xe6] + vrcp14sd %xmm14, %xmm22, %xmm12 + +// CHECK: vrcp14sd %xmm14, %xmm22, %xmm12 {%k2} +// CHECK: encoding: [0x62,0x52,0xcd,0x02,0x4d,0xe6] + vrcp14sd %xmm14, %xmm22, %xmm12 {%k2} + +// CHECK: vrcp14sd %xmm14, %xmm22, %xmm12 {%k2} {z} +// CHECK: encoding: [0x62,0x52,0xcd,0x82,0x4d,0xe6] + vrcp14sd %xmm14, %xmm22, %xmm12 {%k2} {z} + +// CHECK: vrcp14sd (%rcx), %xmm22, %xmm12 +// CHECK: encoding: [0x62,0x72,0xcd,0x00,0x4d,0x21] + vrcp14sd (%rcx), %xmm22, %xmm12 + +// CHECK: vrcp14sd 291(%rax,%r14,8), %xmm22, %xmm12 +// CHECK: encoding: [0x62,0x32,0xcd,0x00,0x4d,0xa4,0xf0,0x23,0x01,0x00,0x00] + vrcp14sd 291(%rax,%r14,8), %xmm22, %xmm12 + +// CHECK: vrcp14sd 1016(%rdx), %xmm22, %xmm12 +// CHECK: encoding: [0x62,0x72,0xcd,0x00,0x4d,0x62,0x7f] + vrcp14sd 1016(%rdx), %xmm22, %xmm12 + +// CHECK: vrcp14sd 1024(%rdx), %xmm22, %xmm12 +// CHECK: encoding: [0x62,0x72,0xcd,0x00,0x4d,0xa2,0x00,0x04,0x00,0x00] + vrcp14sd 1024(%rdx), %xmm22, %xmm12 + +// CHECK: vrcp14sd -1024(%rdx), %xmm22, %xmm12 +// CHECK: encoding: [0x62,0x72,0xcd,0x00,0x4d,0x62,0x80] + vrcp14sd -1024(%rdx), %xmm22, %xmm12 + +// CHECK: vrcp14sd -1032(%rdx), %xmm22, %xmm12 +// CHECK: encoding: [0x62,0x72,0xcd,0x00,0x4d,0xa2,0xf8,0xfb,0xff,0xff] + vrcp14sd -1032(%rdx), %xmm22, %xmm12 + +// CHECK: vrcp14ss %xmm3, %xmm8, %xmm8 +// CHECK: encoding: [0x62,0x72,0x3d,0x08,0x4d,0xc3] + vrcp14ss %xmm3, %xmm8, %xmm8 + +// CHECK: vrcp14ss %xmm3, %xmm8, %xmm8 {%k7} +// CHECK: encoding: [0x62,0x72,0x3d,0x0f,0x4d,0xc3] + vrcp14ss %xmm3, %xmm8, %xmm8 {%k7} + +// CHECK: vrcp14ss %xmm3, %xmm8, %xmm8 {%k7} {z} +// CHECK: encoding: [0x62,0x72,0x3d,0x8f,0x4d,0xc3] + vrcp14ss %xmm3, %xmm8, %xmm8 {%k7} {z} + +// CHECK: vrcp14ss (%rcx), %xmm8, %xmm8 +// CHECK: encoding: [0x62,0x72,0x3d,0x08,0x4d,0x01] + vrcp14ss (%rcx), %xmm8, %xmm8 + +// CHECK: vrcp14ss 291(%rax,%r14,8), %xmm8, %xmm8 +// CHECK: encoding: [0x62,0x32,0x3d,0x08,0x4d,0x84,0xf0,0x23,0x01,0x00,0x00] + vrcp14ss 291(%rax,%r14,8), %xmm8, %xmm8 + +// CHECK: vrcp14ss 508(%rdx), %xmm8, %xmm8 +// CHECK: encoding: [0x62,0x72,0x3d,0x08,0x4d,0x42,0x7f] + vrcp14ss 508(%rdx), %xmm8, %xmm8 + +// CHECK: vrcp14ss 512(%rdx), %xmm8, %xmm8 +// CHECK: encoding: [0x62,0x72,0x3d,0x08,0x4d,0x82,0x00,0x02,0x00,0x00] + vrcp14ss 512(%rdx), %xmm8, %xmm8 + +// CHECK: vrcp14ss -512(%rdx), %xmm8, %xmm8 +// CHECK: encoding: [0x62,0x72,0x3d,0x08,0x4d,0x42,0x80] + vrcp14ss -512(%rdx), %xmm8, %xmm8 + +// CHECK: vrcp14ss -516(%rdx), %xmm8, %xmm8 +// CHECK: encoding: [0x62,0x72,0x3d,0x08,0x4d,0x82,0xfc,0xfd,0xff,0xff] + vrcp14ss -516(%rdx), %xmm8, %xmm8