Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -3337,6 +3337,16 @@ >; } +let Sched = WriteFRsqrt in { +def SSE_RSQRTPS : OpndItins< + IIC_SSE_RSQRTPS_RR, IIC_SSE_RSQRTPS_RM +>; + +def SSE_RSQRTSS : OpndItins< + IIC_SSE_RSQRTSS_RR, IIC_SSE_RSQRTSS_RM +>; +} + let Sched = WriteFRcp in { def SSE_RCPP : OpndItins< IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM @@ -3615,10 +3625,10 @@ // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. -defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTSS>, - sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTPS>, +defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_RSQRTSS>, + sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS>, sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps, - int_x86_avx_rsqrt_ps_256, SSE_SQRTPS>; + int_x86_avx_rsqrt_ps_256, SSE_RSQRTPS>; defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>, sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>, sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, Index: lib/Target/X86/X86SchedHaswell.td =================================================================== --- lib/Target/X86/X86SchedHaswell.td +++ lib/Target/X86/X86SchedHaswell.td @@ -129,6 +129,7 @@ defm : HWWriteResPair; defm : HWWriteResPair; // 10-14 cycles. defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; Index: lib/Target/X86/X86SchedSandyBridge.td =================================================================== --- lib/Target/X86/X86SchedSandyBridge.td +++ lib/Target/X86/X86SchedSandyBridge.td @@ -117,6 +117,7 @@ defm : SBWriteResPair; defm : SBWriteResPair; // 10-14 cycles. defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; Index: lib/Target/X86/X86Schedule.td =================================================================== --- lib/Target/X86/X86Schedule.td +++ lib/Target/X86/X86Schedule.td @@ -63,12 +63,13 @@ defm WriteJump : X86SchedWritePair; // Floating point. This covers both scalar and vector operations. -defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare. -defm WriteFMul : X86SchedWritePair; // Floating point multiplication. -defm WriteFDiv : X86SchedWritePair; // Floating point division. -defm WriteFSqrt : X86SchedWritePair; // Floating point square root. -defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal. -defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. +defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare. +defm WriteFMul : X86SchedWritePair; // Floating point multiplication. +defm WriteFDiv : X86SchedWritePair; // Floating point division. +defm WriteFSqrt : X86SchedWritePair; // Floating point square root. +defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal estimate. +defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate. +defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles. defm WriteFBlend : X86SchedWritePair; // Floating point vector blends. defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends. @@ -314,6 +315,11 @@ def IIC_SSE_SQRTSD_RR : InstrItinClass; def IIC_SSE_SQRTSD_RM : InstrItinClass; +def IIC_SSE_RSQRTPS_RR : InstrItinClass; +def IIC_SSE_RSQRTPS_RM : InstrItinClass; +def IIC_SSE_RSQRTSS_RR : InstrItinClass; +def IIC_SSE_RSQRTSS_RM : InstrItinClass; + def IIC_SSE_RCPP_RR : InstrItinClass; def IIC_SSE_RCPP_RM : InstrItinClass; def IIC_SSE_RCPS_RR : InstrItinClass; Index: lib/Target/X86/X86ScheduleAtom.td =================================================================== --- lib/Target/X86/X86ScheduleAtom.td +++ lib/Target/X86/X86ScheduleAtom.td @@ -224,6 +224,11 @@ InstrItinData] >, InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, InstrItinData] >, InstrItinData] >, Index: lib/Target/X86/X86ScheduleBtVer2.td =================================================================== --- lib/Target/X86/X86ScheduleBtVer2.td +++ lib/Target/X86/X86ScheduleBtVer2.td @@ -163,15 +163,15 @@ // FIXME: should we bother splitting JFPU pipe + unit stages for fast instructions? // FIXME: Double precision latencies // FIXME: SS vs PS latencies -// FIXME: RSQRT latencies // FIXME: ymm latencies //////////////////////////////////////////////////////////////////////////////// -defm : JWriteResFpuPair; -defm : JWriteResFpuPair; -defm : JWriteResFpuPair; -defm : JWriteResFpuPair; -defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; defm : JWriteResFpuPair; def : WriteRes { Index: lib/Target/X86/X86ScheduleSLM.td =================================================================== --- lib/Target/X86/X86ScheduleSLM.td +++ lib/Target/X86/X86ScheduleSLM.td @@ -101,6 +101,7 @@ // Scalar and vector floating point. defm : SMWriteResPair; defm : SMWriteResPair; +defm : SMWriteResPair; defm : SMWriteResPair; defm : SMWriteResPair; defm : SMWriteResPair;