Index: lib/Target/X86/X86ScheduleBtVer2.td =================================================================== --- lib/Target/X86/X86ScheduleBtVer2.td +++ lib/Target/X86/X86ScheduleBtVer2.td @@ -484,6 +484,45 @@ } def : InstRW<[WriteVMULYPSLd, ReadAfterLd], (instregex "VMULPSYrm", "VRCPPSYm", "VRSQRTPSYm")>; +def Write32: SchedWriteRes<[JSTC]> { + let Latency = 3; + let ResourceCycles = [2]; +} +def : InstRW<[Write32], (instregex "VCVTDQ2P(S|D)Yrr")>; +def : InstRW<[Write32], (instregex "VMOVNTP(S|D)Ymr")>; +def : InstRW<[Write32], (instregex "VROUNDYP(S|D)r")>; + +def Write32Ld: SchedWriteRes<[JLAGU, JSTC]> { + let Latency = 8; + let ResourceCycles = [1, 2]; +} +def : InstRW<[Write32Ld, ReadAfterLd], (instregex "VCVTDQ2P(S|D)Yrm")>; +def : InstRW<[Write32Ld, ReadAfterLd], (instregex "VROUNDYP(S|D)m")>; + +def WriteVCVTPDY: SchedWriteRes<[JSTC, JFPU01]> { + let Latency = 6; + let ResourceCycles = [2, 2]; +} +def : InstRW<[WriteVCVTPDY], (instregex "VCVTPD2(DQ|PS)Yrr")>; + +def WriteVCVTPDYLd: SchedWriteRes<[JLAGU, JSTC, JFPU01]> { + let Latency = 11; + let ResourceCycles = [1, 2, 2]; +} +def : InstRW<[WriteVCVTPDYLd, ReadAfterLd], (instregex "VCVTPD2(DQ|PS)Yrm")>; + +def WriteVCVTPSY: SchedWriteRes<[JSTC]> { + let Latency = 3; + let ResourceCycles = [2]; +} +def : InstRW<[WriteVCVTPSY], (instregex "VCVTPS2DQYrr")>; + +def WriteVCVTPSYLd: SchedWriteRes<[JLAGU, JSTC]> { + let Latency = 11; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteVCVTPSYLd, ReadAfterLd], (instregex "VCVTPS2DQYrm")>; + def WriteVSQRTYPD: SchedWriteRes<[JFPU1]> { let Latency = 54; let ResourceCycles = [54]; Index: test/CodeGen/X86/avx-schedule.ll =================================================================== --- test/CodeGen/X86/avx-schedule.ll +++ test/CodeGen/X86/avx-schedule.ll @@ -1090,8 +1090,8 @@ ; ; BTVER2-LABEL: test_cvtdq2pd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [3:1.00] +; BTVER2-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [8:2.00] +; BTVER2-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -1155,8 +1155,8 @@ ; ; BTVER2-LABEL: test_cvtdq2ps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] +; BTVER2-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [8:2.00] +; BTVER2-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -1281,8 +1281,8 @@ ; ; BTVER2-LABEL: test_cvtpd2ps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [3:1.00] +; BTVER2-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:2.00] +; BTVER2-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [6:2.00] ; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -2719,7 +2719,7 @@ ; BTVER2-LABEL: test_movntpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] +; BTVER2-NEXT: vmovntpd %ymm0, (%rdi) # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_movntpd: @@ -2772,7 +2772,7 @@ ; BTVER2-LABEL: test_movntps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] +; BTVER2-NEXT: vmovntps %ymm0, (%rdi) # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_movntps: @@ -3925,8 +3925,8 @@ ; ; BTVER2-LABEL: test_roundpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [8:1.00] -; BTVER2-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00] +; BTVER2-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [8:2.00] +; BTVER2-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -3989,8 +3989,8 @@ ; ; BTVER2-LABEL: test_roundps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [8:1.00] -; BTVER2-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:1.00] +; BTVER2-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [8:2.00] +; BTVER2-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ;