Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -254,7 +254,9 @@ /// Note that these typically require refinement /// in order to obtain suitable precision. FRSQRT, FRCP, - FRSQRTS, FRCPS, + + // AVX-512 reciprocal approximations with a little more precision. + RSQRT14, RSQRT14S, RCP14, RCP14S, // Thread Local Storage. TLSADDR, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -24786,9 +24786,7 @@ case X86ISD::FMAXC: return "X86ISD::FMAXC"; case X86ISD::FMINC: return "X86ISD::FMINC"; case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; - case X86ISD::FRSQRTS: return "X86ISD::FRSQRTS"; case X86ISD::FRCP: return "X86ISD::FRCP"; - case X86ISD::FRCPS: return "X86ISD::FRCPS"; case X86ISD::EXTRQI: return "X86ISD::EXTRQI"; case X86ISD::INSERTQI: return "X86ISD::INSERTQI"; case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; @@ -24966,9 +24964,13 @@ case X86ISD::SELECT: return "X86ISD::SELECT"; case X86ISD::SELECTS: return "X86ISD::SELECTS"; case X86ISD::ADDSUB: return "X86ISD::ADDSUB"; + case X86ISD::RCP14: return "X86ISD::RCP14"; + case X86ISD::RCP14S: return "X86ISD::RCP14S"; case X86ISD::RCP28: return "X86ISD::RCP28"; case X86ISD::RCP28S: return "X86ISD::RCP28S"; case X86ISD::EXP2: return "X86ISD::EXP2"; + case X86ISD::RSQRT14: return "X86ISD::RSQRT14"; + case X86ISD::RSQRT14S: return "X86ISD::RSQRT14S"; case X86ISD::RSQRT28: return "X86ISD::RSQRT28"; case X86ISD::RSQRT28S: return "X86ISD::RSQRT28S"; case X86ISD::FADD_RND: return "X86ISD::FADD_RND"; Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -7362,13 +7362,13 @@ } } -defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86frcp14s, f32x_info>, +defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, f32x_info>, EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable; -defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86frcp14s, f64x_info>, +defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable; -defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86frsqrt14s, f32x_info>, +defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, f32x_info>, EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable; -defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86frsqrt14s, f64x_info>, +defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable; /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd @@ -7414,8 +7414,8 @@ } } -defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86frsqrt>; -defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86frcp>; +defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14>; +defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14>; /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd multiclass avx512_fp28_s opc, string OpcodeStr,X86VectorVTInfo _, Index: lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- lib/Target/X86/X86InstrFragmentsSIMD.td +++ lib/Target/X86/X86InstrFragmentsSIMD.td @@ -56,8 +56,6 @@ def X86fandn : SDNode<"X86ISD::FANDN", SDTFPBinOp>; def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>; def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>; -def X86frsqrt14s: SDNode<"X86ISD::FRSQRTS", SDTFPBinOp>; -def X86frcp14s : SDNode<"X86ISD::FRCPS", SDTFPBinOp>; def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>; def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>; def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>; @@ -498,10 +496,14 @@ def x86vpmadd52l : SDNode<"X86ISD::VPMADD52L", SDTIFma, [SDNPCommutative]>; def x86vpmadd52h : SDNode<"X86ISD::VPMADD52H", SDTIFma, [SDNPCommutative]>; +def X86rsqrt14 : SDNode<"X86ISD::RSQRT14", SDTFPUnaryOp>; +def X86rcp14 : SDNode<"X86ISD::RCP14", SDTFPUnaryOp>; def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", SDTFPUnaryOpRound>; def X86rcp28 : SDNode<"X86ISD::RCP28", SDTFPUnaryOpRound>; def X86exp2 : SDNode<"X86ISD::EXP2", SDTFPUnaryOpRound>; +def X86rsqrt14s : SDNode<"X86ISD::RSQRT14S", SDTFPBinOp>; +def X86rcp14s : SDNode<"X86ISD::RCP14S", SDTFPBinOp>; def X86rsqrt28s : SDNode<"X86ISD::RSQRT28S", SDTFPBinOpRound>; def X86rcp28s : SDNode<"X86ISD::RCP28S", SDTFPBinOpRound>; def X86RndScales : SDNode<"X86ISD::VRNDSCALES", SDTFPBinOpImmRound>; Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -3252,9 +3252,9 @@ // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SSE_RSQRTSS>, - sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS, [HasAVX, NoVLX] >; + sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS, [HasAVX]>; defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SSE_RCPS>, - sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP, [HasAVX, NoVLX]>; + sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP, [HasAVX]>; // There is no f64 version of the reciprocal approximation instructions. Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -1428,26 +1428,26 @@ X86_INTRINSIC_DATA(avx512_ptestnm_w_128, CMP_MASK, X86ISD::TESTNM, 0), X86_INTRINSIC_DATA(avx512_ptestnm_w_256, CMP_MASK, X86ISD::TESTNM, 0), X86_INTRINSIC_DATA(avx512_ptestnm_w_512, CMP_MASK, X86ISD::TESTNM, 0), - X86_INTRINSIC_DATA(avx512_rcp14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), - X86_INTRINSIC_DATA(avx512_rcp14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), - X86_INTRINSIC_DATA(avx512_rcp14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), - X86_INTRINSIC_DATA(avx512_rcp14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), - X86_INTRINSIC_DATA(avx512_rcp14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), - X86_INTRINSIC_DATA(avx512_rcp14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), - X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRCPS, 0), - X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRCPS, 0), + X86_INTRINSIC_DATA(avx512_rcp14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0), + X86_INTRINSIC_DATA(avx512_rcp14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0), + X86_INTRINSIC_DATA(avx512_rcp14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0), + X86_INTRINSIC_DATA(avx512_rcp14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0), + X86_INTRINSIC_DATA(avx512_rcp14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0), + X86_INTRINSIC_DATA(avx512_rcp14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0), + X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::RCP14S, 0), + X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::RCP14S, 0), X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0), X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0), X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28S, 0), X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28S, 0), - X86_INTRINSIC_DATA(avx512_rsqrt14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), - X86_INTRINSIC_DATA(avx512_rsqrt14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), - X86_INTRINSIC_DATA(avx512_rsqrt14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), - X86_INTRINSIC_DATA(avx512_rsqrt14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), - X86_INTRINSIC_DATA(avx512_rsqrt14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), - X86_INTRINSIC_DATA(avx512_rsqrt14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), - X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRTS, 0), - X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRTS, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14S, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14S, 0), X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0), X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0), X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28S, 0), Index: test/CodeGen/X86/avx-intrinsics-x86.ll =================================================================== --- test/CodeGen/X86/avx-intrinsics-x86.ll +++ test/CodeGen/X86/avx-intrinsics-x86.ll @@ -581,15 +581,10 @@ define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) { -; AVX-LABEL: test_x86_avx_rcp_ps_256: -; AVX: # BB#0: -; AVX-NEXT: vrcpps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x53,0xc0] -; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] -; -; AVX512VL-LABEL: test_x86_avx_rcp_ps_256: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vrcp14ps %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x4c,0xc0] -; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; CHECK-LABEL: test_x86_avx_rcp_ps_256: +; CHECK: # BB#0: +; CHECK-NEXT: vrcpps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x53,0xc0] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] ret <8 x float> %res } @@ -619,15 +614,10 @@ define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) { -; AVX-LABEL: test_x86_avx_rsqrt_ps_256: -; AVX: # BB#0: -; AVX-NEXT: vrsqrtps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x52,0xc0] -; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] -; -; AVX512VL-LABEL: test_x86_avx_rsqrt_ps_256: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vrsqrt14ps %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x4e,0xc0] -; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; CHECK-LABEL: test_x86_avx_rsqrt_ps_256: +; CHECK: # BB#0: +; CHECK-NEXT: vrsqrtps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x52,0xc0] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] ret <8 x float> %res } Index: test/CodeGen/X86/avx-schedule.ll =================================================================== --- test/CodeGen/X86/avx-schedule.ll +++ test/CodeGen/X86/avx-schedule.ll @@ -3982,8 +3982,8 @@ ; ; SKX-LABEL: test_rcpps: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %ymm0, %ymm0 # sched: [4:1.00] -; SKX-NEXT: vrcp14ps (%rdi), %ymm1 # sched: [11:1.00] +; SKX-NEXT: vrcpps %ymm0, %ymm0 # sched: [4:1.00] +; SKX-NEXT: vrcpps (%rdi), %ymm1 # sched: [11:1.00] ; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -4174,8 +4174,8 @@ ; ; SKX-LABEL: test_rsqrtps: ; SKX: # BB#0: -; SKX-NEXT: vrsqrt14ps %ymm0, %ymm0 # sched: [4:1.00] -; SKX-NEXT: vrsqrt14ps (%rdi), %ymm1 # sched: [11:1.00] +; SKX-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [4:1.00] +; SKX-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [11:1.00] ; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; Index: test/CodeGen/X86/recip-fastmath.ll =================================================================== --- test/CodeGen/X86/recip-fastmath.ll +++ test/CodeGen/X86/recip-fastmath.ll @@ -416,7 +416,7 @@ ; ; SKX-LABEL: v4f32_one_step: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %xmm0, %xmm1 # sched: [4:1.00] +; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00] ; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0 # sched: [10:0.50] ; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] @@ -533,7 +533,7 @@ ; ; SKX-LABEL: v4f32_two_step: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %xmm0, %xmm1 # sched: [4:1.00] +; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00] ; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50] ; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] ; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [4:0.33] @@ -691,7 +691,7 @@ ; ; SKX-LABEL: v8f32_one_step: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %ymm0, %ymm1 # sched: [4:1.00] +; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00] ; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0 # sched: [11:0.50] ; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] @@ -821,7 +821,7 @@ ; ; SKX-LABEL: v8f32_two_step: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %ymm0, %ymm1 # sched: [4:1.00] +; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00] ; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50] ; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] ; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [4:0.33] Index: test/CodeGen/X86/recip-fastmath2.ll =================================================================== --- test/CodeGen/X86/recip-fastmath2.ll +++ test/CodeGen/X86/recip-fastmath2.ll @@ -478,7 +478,7 @@ ; ; SKX-LABEL: v4f32_one_step2: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %xmm0, %xmm1 # sched: [4:1.00] +; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00] ; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0 # sched: [10:0.50] ; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50] @@ -580,7 +580,7 @@ ; ; SKX-LABEL: v4f32_one_step_2_divs: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %xmm0, %xmm1 # sched: [4:1.00] +; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00] ; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0 # sched: [10:0.50] ; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [10:0.50] @@ -708,7 +708,7 @@ ; ; SKX-LABEL: v4f32_two_step2: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %xmm0, %xmm1 # sched: [4:1.00] +; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00] ; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50] ; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] ; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [4:0.33] @@ -814,7 +814,7 @@ ; ; SKX-LABEL: v8f32_one_step2: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %ymm0, %ymm1 # sched: [4:1.00] +; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00] ; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0 # sched: [11:0.50] ; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [4:0.33] ; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [11:0.50] @@ -925,7 +925,7 @@ ; ; SKX-LABEL: v8f32_one_step_2_divs: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %ymm0, %ymm1 # sched: [4:1.00] +; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00] ; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0 # sched: [11:0.50] ; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [4:0.33] ; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [11:0.50] @@ -1067,7 +1067,7 @@ ; ; SKX-LABEL: v8f32_two_step2: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %ymm0, %ymm1 # sched: [4:1.00] +; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00] ; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50] ; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] ; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [4:0.33] @@ -1124,7 +1124,7 @@ ; ; SKX-LABEL: v8f32_no_step: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %ymm0, %ymm0 # sched: [4:1.00] +; SKX-NEXT: vrcpps %ymm0, %ymm0 # sched: [4:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <8 x float> , %x ret <8 x float> %div @@ -1183,7 +1183,7 @@ ; ; SKX-LABEL: v8f32_no_step2: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %ymm0, %ymm0 # sched: [4:1.00] +; SKX-NEXT: vrcpps %ymm0, %ymm0 # sched: [4:1.00] ; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <8 x float> , %x Index: test/CodeGen/X86/sse-intrinsics-x86.ll =================================================================== --- test/CodeGen/X86/sse-intrinsics-x86.ll +++ test/CodeGen/X86/sse-intrinsics-x86.ll @@ -401,15 +401,10 @@ ; SSE-NEXT: rcpps %xmm0, %xmm0 ## encoding: [0x0f,0x53,0xc0] ; SSE-NEXT: retl ## encoding: [0xc3] ; -; AVX2-LABEL: test_x86_sse_rcp_ps: -; AVX2: ## BB#0: -; AVX2-NEXT: vrcpps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x53,0xc0] -; AVX2-NEXT: retl ## encoding: [0xc3] -; -; SKX-LABEL: test_x86_sse_rcp_ps: -; SKX: ## BB#0: -; SKX-NEXT: vrcp14ps %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x4c,0xc0] -; SKX-NEXT: retl ## encoding: [0xc3] +; VCHECK-LABEL: test_x86_sse_rcp_ps: +; VCHECK: ## BB#0: +; VCHECK-NEXT: vrcpps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x53,0xc0] +; VCHECK-NEXT: retl ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -438,15 +433,10 @@ ; SSE-NEXT: rsqrtps %xmm0, %xmm0 ## encoding: [0x0f,0x52,0xc0] ; SSE-NEXT: retl ## encoding: [0xc3] ; -; AVX2-LABEL: test_x86_sse_rsqrt_ps: -; AVX2: ## BB#0: -; AVX2-NEXT: vrsqrtps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x52,0xc0] -; AVX2-NEXT: retl ## encoding: [0xc3] -; -; SKX-LABEL: test_x86_sse_rsqrt_ps: -; SKX: ## BB#0: -; SKX-NEXT: vrsqrt14ps %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x4e,0xc0] -; SKX-NEXT: retl ## encoding: [0xc3] +; VCHECK-LABEL: test_x86_sse_rsqrt_ps: +; VCHECK: ## BB#0: +; VCHECK-NEXT: vrsqrtps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x52,0xc0] +; VCHECK-NEXT: retl ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res } Index: test/CodeGen/X86/sse-schedule.ll =================================================================== --- test/CodeGen/X86/sse-schedule.ll +++ test/CodeGen/X86/sse-schedule.ll @@ -2547,8 +2547,8 @@ ; ; SKX-LABEL: test_rcpps: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %xmm0, %xmm0 # sched: [4:1.00] -; SKX-NEXT: vrcp14ps (%rdi), %xmm1 # sched: [10:1.00] +; SKX-NEXT: vrcpps %xmm0, %xmm0 # sched: [4:1.00] +; SKX-NEXT: vrcpps (%rdi), %xmm1 # sched: [10:1.00] ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2719,8 +2719,8 @@ ; ; SKX-LABEL: test_rsqrtps: ; SKX: # BB#0: -; SKX-NEXT: vrsqrt14ps %xmm0, %xmm0 # sched: [4:1.00] -; SKX-NEXT: vrsqrt14ps (%rdi), %xmm1 # sched: [10:1.00] +; SKX-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [4:1.00] +; SKX-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [10:1.00] ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ;