Index: lib/Target/X86/X86SchedHaswell.td =================================================================== --- lib/Target/X86/X86SchedHaswell.td +++ lib/Target/X86/X86SchedHaswell.td @@ -127,10 +127,8 @@ // Scalar and vector floating point. defm : HWWriteResPair; defm : HWWriteResPair; -defm : HWWriteResPair; // 10-14 cycles. defm : HWWriteResPair; defm : HWWriteResPair; -defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; @@ -138,6 +136,24 @@ defm : HWWriteResPair; defm : HWWriteResPair; +def : WriteRes { + let Latency = 12; // 10-14 cycles. + let ResourceCycles = [12]; +} +def : WriteRes { + let Latency = 16; // load + 10-14 cycles. + let ResourceCycles = [1, 12]; +} + +def : WriteRes { + let Latency = 15; + let ResourceCycles = [15]; +} +def : WriteRes { + let Latency = 19; + let ResourceCycles = [1, 15]; +} + def : WriteRes { let Latency = 2; let ResourceCycles = [2]; @@ -1906,7 +1922,7 @@ def WriteVDIVPSYrr : SchedWriteRes<[HWPort0, HWPort15]> { let Latency = 19; // 18-21 cycles. let NumMicroOps = 3; - let ResourceCycles = [2, 1]; + let ResourceCycles = [2, 19]; } def : InstRW<[WriteVDIVPSYrr], (instregex "VDIVPSYrr")>; @@ -1914,7 +1930,7 @@ def WriteVDIVPSYrm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { let Latency = 23; // 18-21 + 4 cycles. let NumMicroOps = 4; - let ResourceCycles = [2, 1, 1]; + let ResourceCycles = [2, 1, 19]; } def : InstRW<[WriteVDIVPSYrm, ReadAfterLd], (instregex "VDIVPSYrm")>; @@ -1923,7 +1939,7 @@ def WriteVDIVPDYrr : SchedWriteRes<[HWPort0, HWPort15]> { let Latency = 27; // 19-35 cycles. let NumMicroOps = 3; - let ResourceCycles = [2, 1]; + let ResourceCycles = [2, 27]; } def : InstRW<[WriteVDIVPDYrr], (instregex "VDIVPDYrr")>; @@ -1931,7 +1947,7 @@ def WriteVDIVPDYrm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { let Latency = 31; // 19-35 + 4 cycles. let NumMicroOps = 4; - let ResourceCycles = [2, 1, 1]; + let ResourceCycles = [2, 1, 27]; } def : InstRW<[WriteVDIVPDYrm, ReadAfterLd], (instregex "VDIVPDYrm")>; @@ -2044,7 +2060,7 @@ def WriteVSQRTPSYr : SchedWriteRes<[HWPort0, HWPort15]> { let Latency = 19; let NumMicroOps = 3; - let ResourceCycles = [2, 1]; + let ResourceCycles = [2, 19]; } def : InstRW<[WriteVSQRTPSYr], (instregex "VSQRTPSYr")>; @@ -2052,7 +2068,7 @@ def WriteVSQRTPSYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { let Latency = 23; let NumMicroOps = 4; - let ResourceCycles = [2, 1, 1]; + let ResourceCycles = [2, 1, 19]; } def : InstRW<[WriteVSQRTPSYm], (instregex "VSQRTPSYm")>; @@ -2061,7 +2077,7 @@ def WriteVSQRTPDYr : SchedWriteRes<[HWPort0, HWPort15]> { let Latency = 28; let NumMicroOps = 3; - let ResourceCycles = [2, 1]; + let ResourceCycles = [2, 28]; } def : InstRW<[WriteVSQRTPDYr], (instregex "VSQRTPDYr")>; @@ -2069,7 +2085,7 @@ def WriteVSQRTPDYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { let Latency = 32; let NumMicroOps = 4; - let ResourceCycles = [2, 1, 1]; + let ResourceCycles = [2, 1, 32]; } def : InstRW<[WriteVSQRTPDYm], (instregex "VSQRTPDYm")>; Index: lib/Target/X86/X86SchedSandyBridge.td =================================================================== --- lib/Target/X86/X86SchedSandyBridge.td +++ lib/Target/X86/X86SchedSandyBridge.td @@ -115,15 +115,32 @@ // Scalar and vector floating point. defm : SBWriteResPair; defm : SBWriteResPair; -defm : SBWriteResPair; // 10-14 cycles. defm : SBWriteResPair; defm : SBWriteResPair; -defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; + +def : WriteRes { + let Latency = 12; // 10-14 cycles. + let ResourceCycles = [12]; +} +def : WriteRes { + let Latency = 16; // load + 10-14 cycles. + let ResourceCycles = [1, 12]; +} + +def : WriteRes { + let Latency = 15; + let ResourceCycles = [15]; +} +def : WriteRes { + let Latency = 19; + let ResourceCycles = [1, 15]; +} + def : WriteRes { let Latency = 2; let ResourceCycles = [1, 1]; Index: test/CodeGen/X86/recip-fastmath.ll =================================================================== --- test/CodeGen/X86/recip-fastmath.ll +++ test/CodeGen/X86/recip-fastmath.ll @@ -46,13 +46,13 @@ ; SANDY-LABEL: f32_no_estimate: ; SANDY: # BB#0: ; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] -; SANDY-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00] +; SANDY-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:12.00] ; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: f32_no_estimate: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] -; HASWELL-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00] +; HASWELL-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:12.00] ; HASWELL-NEXT: retq # sched: [1:1.00] ; ; HASWELL-NO-FMA-LABEL: f32_no_estimate: @@ -64,7 +64,7 @@ ; AVX512-LABEL: f32_no_estimate: ; AVX512: # BB#0: ; AVX512-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] -; AVX512-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00] +; AVX512-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:12.00] ; AVX512-NEXT: retq # sched: [1:1.00] %div = fdiv fast float 1.0, %x ret float %div @@ -285,13 +285,13 @@ ; SANDY-LABEL: v4f32_no_estimate: ; SANDY: # BB#0: ; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] -; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00] +; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:12.00] ; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v4f32_no_estimate: ; HASWELL: # BB#0: ; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [4:0.50] -; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00] +; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:12.00] ; HASWELL-NEXT: retq # sched: [1:1.00] ; ; HASWELL-NO-FMA-LABEL: v4f32_no_estimate: @@ -303,7 +303,7 @@ ; AVX512-LABEL: v4f32_no_estimate: ; AVX512: # BB#0: ; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [4:0.50] -; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00] +; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:12.00] ; AVX512-NEXT: retq # sched: [1:1.00] %div = fdiv fast <4 x float> , %x ret <4 x float> %div @@ -547,13 +547,13 @@ ; SANDY-LABEL: v8f32_no_estimate: ; SANDY: # BB#0: ; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] -; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [12:1.00] +; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [12:12.00] ; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v8f32_no_estimate: ; HASWELL: # BB#0: ; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 # sched: [5:1.00] -; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:2.00] +; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:9.50] ; HASWELL-NEXT: retq # sched: [1:1.00] ; ; HASWELL-NO-FMA-LABEL: v8f32_no_estimate: @@ -565,7 +565,7 @@ ; AVX512-LABEL: v8f32_no_estimate: ; AVX512: # BB#0: ; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 # sched: [5:1.00] -; AVX512-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:2.00] +; AVX512-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:9.50] ; AVX512-NEXT: retq # sched: [1:1.00] %div = fdiv fast <8 x float> , %x ret <8 x float> %div Index: test/CodeGen/X86/sse-schedule.ll =================================================================== --- test/CodeGen/X86/sse-schedule.ll +++ test/CodeGen/X86/sse-schedule.ll @@ -714,14 +714,14 @@ ; ; SANDY-LABEL: test_divps: ; SANDY: # BB#0: -; SANDY-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [12:1.00] -; SANDY-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; SANDY-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [12:12.00] +; SANDY-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:12.00] ; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_divps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [12:1.00] -; HASWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; HASWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [12:12.00] +; HASWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:12.00] ; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_divps: @@ -756,14 +756,14 @@ ; ; SANDY-LABEL: test_divss: ; SANDY: # BB#0: -; SANDY-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [12:1.00] -; SANDY-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; SANDY-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [12:12.00] +; SANDY-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:12.00] ; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_divss: ; HASWELL: # BB#0: -; HASWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [12:1.00] -; HASWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; HASWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [12:12.00] +; HASWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:12.00] ; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_divss: @@ -1962,15 +1962,15 @@ ; ; SANDY-LABEL: test_sqrtps: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtps %xmm0, %xmm0 # sched: [15:1.00] -; SANDY-NEXT: vsqrtps (%rdi), %xmm1 # sched: [19:1.00] +; SANDY-NEXT: vsqrtps %xmm0, %xmm0 # sched: [15:15.00] +; SANDY-NEXT: vsqrtps (%rdi), %xmm1 # sched: [19:15.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_sqrtps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [15:1.00] -; HASWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [19:1.00] +; HASWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [15:15.00] +; HASWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [19:15.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [1:1.00] ; @@ -2017,17 +2017,17 @@ ; ; SANDY-LABEL: test_sqrtss: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [19:1.00] +; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [19:15.00] ; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [4:0.50] -; SANDY-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [19:1.00] +; SANDY-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [19:15.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_sqrtss: ; HASWELL: # BB#0: -; HASWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [19:1.00] +; HASWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [19:15.00] ; HASWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [4:0.50] -; HASWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [19:1.00] +; HASWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [19:15.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [1:1.00] ; Index: test/CodeGen/X86/sse2-schedule.ll =================================================================== --- test/CodeGen/X86/sse2-schedule.ll +++ test/CodeGen/X86/sse2-schedule.ll @@ -1234,14 +1234,14 @@ ; ; SANDY-LABEL: test_divpd: ; SANDY: # BB#0: -; SANDY-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [12:1.00] -; SANDY-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; SANDY-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [12:12.00] +; SANDY-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [16:12.00] ; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_divpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [12:1.00] -; HASWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; HASWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [12:12.00] +; HASWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [16:12.00] ; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_divpd: @@ -1276,14 +1276,14 @@ ; ; SANDY-LABEL: test_divsd: ; SANDY: # BB#0: -; SANDY-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [12:1.00] -; SANDY-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; SANDY-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [12:12.00] +; SANDY-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [16:12.00] ; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_divsd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [12:1.00] -; HASWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; HASWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [12:12.00] +; HASWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [16:12.00] ; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_divsd: @@ -5619,15 +5619,15 @@ ; ; SANDY-LABEL: test_sqrtpd: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [15:1.00] -; SANDY-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [19:1.00] +; SANDY-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [15:15.00] +; SANDY-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [19:15.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_sqrtpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [15:1.00] -; HASWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [19:1.00] +; HASWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [15:15.00] +; HASWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [19:15.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [1:1.00] ; @@ -5674,17 +5674,17 @@ ; ; SANDY-LABEL: test_sqrtsd: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [19:1.00] +; SANDY-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [19:15.00] ; SANDY-NEXT: vmovapd (%rdi), %xmm1 # sched: [4:0.50] -; SANDY-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [19:1.00] +; SANDY-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [19:15.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_sqrtsd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [19:1.00] +; HASWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [19:15.00] ; HASWELL-NEXT: vmovapd (%rdi), %xmm1 # sched: [4:0.50] -; HASWELL-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [19:1.00] +; HASWELL-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [19:15.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [1:1.00] ;