Index: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td =================================================================== --- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td +++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td @@ -573,6 +573,99 @@ def : InstRW<[WriteVCVTPDYLd, ReadAfterLd], (instregex "VCVTPD2(DQ|PS)Yrm")>; def : InstRW<[WriteVCVTPDYLd, ReadAfterLd], (instregex "VCVTTPD2DQYrm")>; +def WriteVBlendVPY: SchedWriteRes<[JFPU01]> { + let Latency = 3; + let ResourceCycles = [6]; +} +def : InstRW<[WriteVBlendVPY], (instregex "VBLENDVP(S|D)Yrr", "VPERMILP(D|S)Yrr")>; + +def WriteVBlendVPYLd: SchedWriteRes<[JLAGU, JFPU01]> { + let Latency = 8; + let ResourceCycles = [1, 6]; +} +def : InstRW<[WriteVBlendVPYLd, ReadAfterLd], (instregex "VBLENDVP(S|D)Yrm")>; + +def WriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01]> { + let Latency = 6; + let ResourceCycles = [1, 4]; +} +def : InstRW<[WriteVBROADCASTYLd, ReadAfterLd], (instregex "VBROADCASTS(S|D)Yrm")>; + +def WriteFPAY22: SchedWriteRes<[JFPU0]> { + let Latency = 2; + let ResourceCycles = [2]; +} +def : InstRW<[WriteFPAY22], (instregex "VCMPP(S|D)Yrri", "VM(AX|IN)P(D|S)Yrr")>; + +def WriteFPAY22Ld: SchedWriteRes<[JLAGU, JFPU0]> { + let Latency = 7; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteFPAY22Ld, ReadAfterLd], (instregex "VCMPP(S|D)Yrmi", "VM(AX|IN)P(D|S)Yrm")>; + +def WriteVHAddSubY: SchedWriteRes<[JFPU0]> { + let Latency = 3; + let ResourceCycles = [2]; +} +def : InstRW<[WriteVHAddSubY], (instregex "VH(ADD|SUB)P(D|S)Yrr")>; + +def WriteVHAddSubYLd: SchedWriteRes<[JLAGU, JFPU0]> { + let Latency = 8; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteVHAddSubYLd], (instregex "VH(ADD|SUB)P(D|S)Yrm")>; + +def WriteVMaskMovLd: SchedWriteRes<[JLAGU,JFPU01]> { + let Latency = 6; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteVMaskMovLd], (instregex "VMASKMOVP(D|S)rm")>; + +def WriteVMaskMovYLd: SchedWriteRes<[JLAGU,JFPU01]> { + let Latency = 6; + let ResourceCycles = [1, 4]; +} +def : InstRW<[WriteVMaskMovYLd], (instregex "VMASKMOVP(D|S)Yrm")>; + +def WriteVMaskMovSt: SchedWriteRes<[JFPU01,JSAGU]> { + let Latency = 6; + let ResourceCycles = [4, 1]; +} +def : InstRW<[WriteVMaskMovSt], (instregex "VMASKMOVP(D|S)mr")>; + +def WriteVMaskMovYSt: SchedWriteRes<[JFPU01,JSAGU]> { + let Latency = 6; + let ResourceCycles = [4, 1]; +} +def : InstRW<[WriteVMaskMovYSt], (instregex "VMASKMOVP(D|S)Ymr")>; + +// TODO: In fact we have latency '2+i'. The +i represents an additional 1 cycle transfer +// operation which moves the floating point result to the integer unit. During this +// additional cycle the floating point unit execution resources are not occupied +// and ALU0 in the integer unit is occupied instead. +def WriteVMOVMSK: SchedWriteRes<[JFPU0]> { + let Latency = 3; +} +def : InstRW<[WriteVMOVMSK], (instregex "VMOVMSKP(D|S)(Y)?rr")>; + +// TODO: In fact we have latency '3+i'. The +i represents an additional 1 cycle transfer +// operation which moves the floating point result to the integer unit. During this +// additional cycle the floating point unit execution resources are not occupied +// and ALU0 in the integer unit is occupied instead. +def WriteVTESTY: SchedWriteRes<[JFPU01, JFPU0]> { + let Latency = 4; + let ResourceCycles = [4, 2]; +} +def : InstRW<[WriteVTESTY], (instregex "VTESTP(S|D)Yrr")>; +def : InstRW<[WriteVTESTY], (instregex "VPTESTYrr")>; + +def WriteVTESTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPU0]> { + let Latency = 9; + let ResourceCycles = [1, 4, 2]; +} +def : InstRW<[WriteVTESTYLd], (instregex "VTESTP(S|D)Yrm")>; +def : InstRW<[WriteVTESTYLd], (instregex "VPTESTYrm")>; + def WriteVSQRTYPD: SchedWriteRes<[JFPU1]> { let Latency = 54; let ResourceCycles = [54]; Index: llvm/trunk/test/CodeGen/X86/avx-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx-schedule.ll +++ llvm/trunk/test/CodeGen/X86/avx-schedule.ll @@ -655,8 +655,8 @@ ; ; BTVER2-LABEL: test_blendvpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; BTVER2-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; BTVER2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00] +; BTVER2-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:3.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_blendvpd: @@ -710,8 +710,8 @@ ; ; BTVER2-LABEL: test_blendvps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; BTVER2-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; BTVER2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00] +; BTVER2-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:3.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_blendvps: @@ -804,7 +804,7 @@ ; ; BTVER2-LABEL: test_broadcastsd_ymm: ; BTVER2: # BB#0: -; BTVER2-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [6:1.00] +; BTVER2-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [6:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_broadcastsd_ymm: @@ -896,7 +896,7 @@ ; ; BTVER2-LABEL: test_broadcastss_ymm: ; BTVER2: # BB#0: -; BTVER2-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [6:1.00] +; BTVER2-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [6:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_broadcastss_ymm: @@ -956,8 +956,8 @@ ; ; BTVER2-LABEL: test_cmppd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; BTVER2-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; BTVER2-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [2:2.00] +; BTVER2-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00] ; BTVER2-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -1024,8 +1024,8 @@ ; ; BTVER2-LABEL: test_cmpps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; BTVER2-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; BTVER2-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [2:2.00] +; BTVER2-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [7:2.00] ; BTVER2-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -2089,8 +2089,8 @@ ; ; BTVER2-LABEL: test_maskmovpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 -; BTVER2-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) +; BTVER2-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [6:1.00] +; BTVER2-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [6:2.00] ; BTVER2-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -2152,8 +2152,8 @@ ; ; BTVER2-LABEL: test_maskmovpd_ymm: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 -; BTVER2-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) +; BTVER2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [6:2.00] +; BTVER2-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [6:2.00] ; BTVER2-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -2215,8 +2215,8 @@ ; ; BTVER2-LABEL: test_maskmovps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 -; BTVER2-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) +; BTVER2-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [6:1.00] +; BTVER2-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [6:2.00] ; BTVER2-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -2278,8 +2278,8 @@ ; ; BTVER2-LABEL: test_maskmovps_ymm: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 -; BTVER2-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) +; BTVER2-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [6:2.00] +; BTVER2-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [6:2.00] ; BTVER2-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -2335,8 +2335,8 @@ ; ; BTVER2-LABEL: test_maxpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BTVER2-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; BTVER2-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [2:2.00] +; BTVER2-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_maxpd: @@ -2390,8 +2390,8 @@ ; ; BTVER2-LABEL: test_maxps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BTVER2-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; BTVER2-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [2:2.00] +; BTVER2-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [7:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_maxps: @@ -2445,8 +2445,8 @@ ; ; BTVER2-LABEL: test_minpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BTVER2-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; BTVER2-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [2:2.00] +; BTVER2-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_minpd: @@ -2500,8 +2500,8 @@ ; ; BTVER2-LABEL: test_minps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BTVER2-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; BTVER2-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [2:2.00] +; BTVER2-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [7:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_minps: @@ -2742,7 +2742,7 @@ ; ; BTVER2-LABEL: test_movmskpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmovmskpd %ymm0, %eax # sched: [1:0.50] +; BTVER2-NEXT: vmovmskpd %ymm0, %eax # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_movmskpd: @@ -2794,7 +2794,7 @@ ; ; BTVER2-LABEL: test_movmskps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmovmskps %ymm0, %eax # sched: [1:0.50] +; BTVER2-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_movmskps: @@ -3818,7 +3818,7 @@ ; ; BTVER2-LABEL: test_permilvarpd_ymm: ; BTVER2: # BB#0: -; BTVER2-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [3:3.00] ; BTVER2-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -3928,7 +3928,7 @@ ; ; BTVER2-LABEL: test_permilvarps_ymm: ; BTVER2: # BB#0: -; BTVER2-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [3:3.00] ; BTVER2-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -4697,9 +4697,9 @@ ; BTVER2-LABEL: test_testpd_ymm: ; BTVER2: # BB#0: ; BTVER2-NEXT: xorl %eax, %eax # sched: [1:0.50] -; BTVER2-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vtestpd %ymm1, %ymm0 # sched: [4:3.00] ; BTVER2-NEXT: setb %al # sched: [1:0.50] -; BTVER2-NEXT: vtestpd (%rdi), %ymm0 # sched: [6:1.00] +; BTVER2-NEXT: vtestpd (%rdi), %ymm0 # sched: [9:3.00] ; BTVER2-NEXT: adcl $0, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -4864,9 +4864,9 @@ ; BTVER2-LABEL: test_testps_ymm: ; BTVER2: # BB#0: ; BTVER2-NEXT: xorl %eax, %eax # sched: [1:0.50] -; BTVER2-NEXT: vtestps %ymm1, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vtestps %ymm1, %ymm0 # sched: [4:3.00] ; BTVER2-NEXT: setb %al # sched: [1:0.50] -; BTVER2-NEXT: vtestps (%rdi), %ymm0 # sched: [6:1.00] +; BTVER2-NEXT: vtestps (%rdi), %ymm0 # sched: [9:3.00] ; BTVER2-NEXT: adcl $0, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; Index: llvm/trunk/test/CodeGen/X86/sse-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse-schedule.ll +++ llvm/trunk/test/CodeGen/X86/sse-schedule.ll @@ -1942,7 +1942,7 @@ ; ; BTVER2-LABEL: test_movmskps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmovmskps %xmm0, %eax # sched: [1:0.50] +; BTVER2-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_movmskps: Index: llvm/trunk/test/CodeGen/X86/sse2-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse2-schedule.ll +++ llvm/trunk/test/CodeGen/X86/sse2-schedule.ll @@ -3179,7 +3179,7 @@ ; ; BTVER2-LABEL: test_movmskpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmovmskpd %xmm0, %eax # sched: [1:0.50] +; BTVER2-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_movmskpd: