Index: lib/Target/X86/X86ScheduleBtVer2.td =================================================================== --- lib/Target/X86/X86ScheduleBtVer2.td +++ lib/Target/X86/X86ScheduleBtVer2.td @@ -438,6 +438,173 @@ } def : InstRW<[WriteVMULYPSLd, ReadAfterLd], (instregex "VMULPSYrm", "VRCPPSYm", "VRSQRTPSYm")>; +def WriteVDPPSY: SchedWriteRes<[JFPU1, JFPU0]> { + let Latency = 12; + let ResourceCycles = [6, 6]; + let NumMicroOps = 10; +} +def : InstRW<[WriteVDPPSY], (instregex "VDPPSYrr")>; + +def WriteVDPPSYLd: SchedWriteRes<[JLAGU, JFPU1, JFPU0]> { + let Latency = 17; + let ResourceCycles = [1, 6, 6]; + let NumMicroOps = 11; +} +def : InstRW<[WriteVDPPSYLd, ReadAfterLd], (instregex "VDPPSYrm")>; + +def Write32: SchedWriteRes<[JSTC]> { + let Latency = 3; + let ResourceCycles = [2]; +} +def : InstRW<[Write32], (instregex "VCVTDQ2P(S|D)Yrr")>; +def : InstRW<[Write32], (instregex "VMOVNTP(S|D)Ymr")>; +def : InstRW<[Write32], (instregex "VROUNDYP(S|D)r")>; + +def Write32Ld: SchedWriteRes<[JLAGU, JSTC]> { + let Latency = 8; + let ResourceCycles = [1, 2]; +} +def : InstRW<[Write32Ld, ReadAfterLd], (instregex "VCVTDQ2P(S|D)Yrm")>; +def : InstRW<[Write32Ld, ReadAfterLd], (instregex "VROUNDYP(S|D)m")>; + +def WriteVCVTYPD: SchedWriteRes<[JSTC, JFPU01]> { + let Latency = 6; + let ResourceCycles = [2, 4]; +} +def : InstRW<[WriteVCVTYPD], (instregex "VCVTPD2(DQ|PS)Yrr")>; + +def WriteVCVTYPDLd: SchedWriteRes<[JLAGU, JSTC, JFPU01]> { + let Latency = 11; + let ResourceCycles = [1, 2, 2]; +} +def : InstRW<[WriteVCVTYPDLd, ReadAfterLd], (instregex "VCVTPD2(DQ|PS)Yrm")>; + +def WriteVCVTYPS: SchedWriteRes<[JSTC]> { + let Latency = 3; + let ResourceCycles = [2]; +} +def : InstRW<[WriteVCVTYPS], (instregex "VCVTPS2DQYrr")>; + +def WriteVCVTYPSLd: SchedWriteRes<[JLAGU, JSTC]> { + let Latency = 11; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteVCVTYPSLd, ReadAfterLd], (instregex "VCVTPS2DQYrm")>; + +def WriteAVX11: SchedWriteRes<[JFPU01]> { + let ResourceCycles = [2]; +} +def : InstRW<[WriteAVX11], (instregex "VAND(N)?P(S|D)Yrr", "VBLENDP(S|D)Yrri", + "VMOVDDUPYrr", "VMOVS(H|L)DUPYrr", "VMOVUP(D|S)Yrr", + "VORP(S|D)Yrr", "VPERMILP(D|S)Yri", "VSHUFP(D|S)Yrri", + "VXORP(S|D)Yrr")>; + +def WriteAVX11Ld: SchedWriteRes<[JLAGU, JFPU01]> { + let Latency = 6; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteAVX11Ld, ReadAfterLd], (instregex "VAND(N)?P(S|D)Yrm", + "VMOVDDUPYrm", "VMOVS(H|L)DUPYrr", "VMOVUP(D|S)Ymr", + "VORP(S|D)Yrm", "VPERMILP(D|S)Yrm", "VSHUFP(D|S)Yrmi", + "VXORP(S|D)Yrm")>; + +def WriteVBlendVPY: SchedWriteRes<[JFPU01]> { + let Latency = 3; + let ResourceCycles = [6]; +} +def : InstRW<[WriteVBlendVPY], (instregex "VBLENDVP(S|D)Yrr", "VPERMILP(D|S)Yrr")>; + +def WriteVBlendVPYLd: SchedWriteRes<[JLAGU, JFPU01]> { + let Latency = 8; + let ResourceCycles = [1, 6]; +} +def : InstRW<[WriteVBlendVPYLd, ReadAfterLd], (instregex "VBLENDVP(S|D)Yrm")>; + +def WriteVBROADCASTY: SchedWriteRes<[JFPU01]> { + let Latency = 1; + let ResourceCycles = [4]; +} +def : InstRW<[WriteVBROADCASTY], (instregex "VBROADCASTS(S|D)Yrr")>; + +def WriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01]> { + let Latency = 6; + let ResourceCycles = [1, 4]; +} +def : InstRW<[WriteVBROADCASTYLd, ReadAfterLd], (instregex "VBROADCASTS(S|D)Yrm")>; + +def WriteFPAY22: SchedWriteRes<[JFPU0]> { + let Latency = 2; + let ResourceCycles = [2]; +} +def : InstRW<[WriteFPAY22], (instregex "VCMPP(S|D)Yrri", "VM(AX|IN)P(D|S)Yrr")>; + +def WriteFPAY22Ld: SchedWriteRes<[JLAGU, JFPU0]> { + let Latency = 7; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteFPAY22Ld, ReadAfterLd], (instregex "VCMPP(S|D)Yrmi", "VM(AX|IN)P(D|S)Yrm")>; + +def WriteVHAddSubY: SchedWriteRes<[JFPU0]> { + let Latency = 3; + let ResourceCycles = [2]; +} +def : InstRW<[WriteVHAddSubY], (instregex "VH(ADD|SUB)P(D|S)Yrr")>; + +def WriteVHAddSubYLd: SchedWriteRes<[JLAGU, JFPU0]> { + let Latency = 8; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteVHAddSubYLd], (instregex "VH(ADD|SUB)P(D|S)Yrm")>; + +def WriteVMaskMovY: SchedWriteRes<[JFPU01]> { + let Latency = 6; + let ResourceCycles = [4]; +} +def : InstRW<[WriteVMaskMovY], (instregex "VMASKMOVP(D|S)Yrm")>; + +def WriteVMaskMovYLd: SchedWriteRes<[JLAGU, JFPU01]> { + let Latency = 11; + let ResourceCycles = [1, 4]; +} +def : InstRW<[WriteVMaskMovYLd], (instregex "VMASKMOVP(D|S)Ymr")>; + +// TODO: In fact we have latency '2+i'. The +i represents an additional 1 cycle transfer +// operation which moves the floating point result to the integer unit. During this +// additional cycle the floating point unit execution resources are not occupied +// and ALU0 in the integer unit is occupied instead. +def WriteVMOVMSK: SchedWriteRes<[JFPU0]> { + let Latency = 3; +} +def : InstRW<[WriteVMOVMSK], (instregex "VMOVMSKP(D|S)Yrr")>; + +def WriteVTEST: SchedWriteRes<[JFPU0]> { + let Latency = 3; + let ResourceCycles = [1]; +} +def : InstRW<[WriteVTEST], (instregex "VTESTP(S|D)rr", "VPTESTrr")>; + +def WriteVTESTLd: SchedWriteRes<[JLAGU, JFPU0]> { + let Latency = 8; + let ResourceCycles = [1, 1]; +} +def : InstRW<[WriteVTESTLd], (instregex "VTESTP(S|D)rm", "VPTESTrm")>; + +// TODO: In fact we have latency '3+i'. The +i represents an additional 1 cycle transfer +// operation which moves the floating point result to the integer unit. During this +// additional cycle the floating point unit execution resources are not occupied +// and ALU0 in the integer unit is occupied instead. +def WriteVTESTY: SchedWriteRes<[JFPU01, JFPU0]> { + let Latency = 4; + let ResourceCycles = [2, 2]; +} +def : InstRW<[WriteVTESTY], (instregex "VTESTP(S|D)Yrr", "VPTESTYrr")>; + +def WriteVTESTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPU0]> { + let Latency = 9; + let ResourceCycles = [1, 4, 2]; +} +def : InstRW<[WriteVTESTYLd], (instregex "VTESTP(S|D)Yrm", "VPTESTYrm")>; + def WriteVSQRTYPD: SchedWriteRes<[JFPU1]> { let Latency = 54; let ResourceCycles = [54]; Index: test/CodeGen/X86/avx-schedule.ll =================================================================== --- test/CodeGen/X86/avx-schedule.ll +++ test/CodeGen/X86/avx-schedule.ll @@ -272,7 +272,7 @@ ; ; BTVER2-LABEL: test_andnotpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; BTVER2-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -341,7 +341,7 @@ ; ; BTVER2-LABEL: test_andnotps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; BTVER2-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -410,7 +410,7 @@ ; ; BTVER2-LABEL: test_andpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; BTVER2-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -477,7 +477,7 @@ ; ; BTVER2-LABEL: test_andps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; BTVER2-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -544,7 +544,7 @@ ; ; BTVER2-LABEL: test_blendpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50] +; BTVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:1.00] ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -601,7 +601,7 @@ ; ; BTVER2-LABEL: test_blendps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50] +; BTVER2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:1.00] ; BTVER2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -655,8 +655,8 @@ ; ; BTVER2-LABEL: test_blendvpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; BTVER2-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; BTVER2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00] +; BTVER2-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:3.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_blendvpd: @@ -710,8 +710,8 @@ ; ; BTVER2-LABEL: test_blendvps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; BTVER2-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; BTVER2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00] +; BTVER2-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:3.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_blendvps: @@ -804,7 +804,7 @@ ; ; BTVER2-LABEL: test_broadcastsd_ymm: ; BTVER2: # BB#0: -; BTVER2-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [6:1.00] +; BTVER2-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [6:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_broadcastsd_ymm: @@ -896,7 +896,7 @@ ; ; BTVER2-LABEL: test_broadcastss_ymm: ; BTVER2: # BB#0: -; BTVER2-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [6:1.00] +; BTVER2-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [6:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_broadcastss_ymm: @@ -956,9 +956,9 @@ ; ; BTVER2-LABEL: test_cmppd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; BTVER2-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; BTVER2-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [2:2.00] +; BTVER2-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00] +; BTVER2-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_cmppd: @@ -1024,9 +1024,9 @@ ; ; BTVER2-LABEL: test_cmpps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; BTVER2-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; BTVER2-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [2:2.00] +; BTVER2-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [7:2.00] +; BTVER2-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_cmpps: @@ -1090,8 +1090,8 @@ ; ; BTVER2-LABEL: test_cvtdq2pd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [3:1.00] +; BTVER2-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [8:2.00] +; BTVER2-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -1155,8 +1155,8 @@ ; ; BTVER2-LABEL: test_cvtdq2ps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] +; BTVER2-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [8:2.00] +; BTVER2-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -1281,8 +1281,8 @@ ; ; BTVER2-LABEL: test_cvtpd2ps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [3:1.00] +; BTVER2-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:2.00] +; BTVER2-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [6:2.00] ; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -1346,7 +1346,7 @@ ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [8:1.00] ; BTVER2-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00] -; BTVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_cvtps2dq: @@ -1509,8 +1509,8 @@ ; ; BTVER2-LABEL: test_dpps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BTVER2-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; BTVER2-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:6.00] +; BTVER2-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [17:6.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_dpps: @@ -2024,8 +2024,8 @@ ; ; BTVER2-LABEL: test_maskmovpd_ymm: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 -; BTVER2-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) +; BTVER2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [6:2.00] +; BTVER2-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [11:2.00] ; BTVER2-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -2150,8 +2150,8 @@ ; ; BTVER2-LABEL: test_maskmovps_ymm: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 -; BTVER2-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) +; BTVER2-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [6:2.00] +; BTVER2-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [11:2.00] ; BTVER2-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -2207,8 +2207,8 @@ ; ; BTVER2-LABEL: test_maxpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BTVER2-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; BTVER2-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [2:2.00] +; BTVER2-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_maxpd: @@ -2262,8 +2262,8 @@ ; ; BTVER2-LABEL: test_maxps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BTVER2-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; BTVER2-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [2:2.00] +; BTVER2-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [7:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_maxps: @@ -2317,8 +2317,8 @@ ; ; BTVER2-LABEL: test_minpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BTVER2-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; BTVER2-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [2:2.00] +; BTVER2-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_minpd: @@ -2372,8 +2372,8 @@ ; ; BTVER2-LABEL: test_minps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BTVER2-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; BTVER2-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [2:2.00] +; BTVER2-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [7:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_minps: @@ -2557,8 +2557,8 @@ ; ; BTVER2-LABEL: test_movddup: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [5:1.00] -; BTVER2-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:0.50] +; BTVER2-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [6:1.00] +; BTVER2-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -2614,7 +2614,7 @@ ; ; BTVER2-LABEL: test_movmskpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmovmskpd %ymm0, %eax # sched: [1:0.50] +; BTVER2-NEXT: vmovmskpd %ymm0, %eax # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_movmskpd: @@ -2666,7 +2666,7 @@ ; ; BTVER2-LABEL: test_movmskps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmovmskps %ymm0, %eax # sched: [1:0.50] +; BTVER2-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_movmskps: @@ -2719,7 +2719,7 @@ ; BTVER2-LABEL: test_movntpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] +; BTVER2-NEXT: vmovntpd %ymm0, (%rdi) # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_movntpd: @@ -2772,7 +2772,7 @@ ; BTVER2-LABEL: test_movntps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] +; BTVER2-NEXT: vmovntps %ymm0, (%rdi) # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_movntps: @@ -2831,7 +2831,7 @@ ; BTVER2-LABEL: test_movshdup: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [5:1.00] -; BTVER2-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:0.50] +; BTVER2-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -2894,7 +2894,7 @@ ; BTVER2-LABEL: test_movsldup: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [5:1.00] -; BTVER2-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:0.50] +; BTVER2-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -2960,7 +2960,7 @@ ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovupd (%rdi), %ymm0 # sched: [5:1.00] ; BTVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] +; BTVER2-NEXT: vmovupd %ymm0, (%rsi) # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_movupd: @@ -3024,7 +3024,7 @@ ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovups (%rdi), %ymm0 # sched: [5:1.00] ; BTVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] +; BTVER2-NEXT: vmovups %ymm0, (%rsi) # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_movups: @@ -3192,7 +3192,7 @@ ; ; BTVER2-LABEL: orpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; BTVER2-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -3259,7 +3259,7 @@ ; ; BTVER2-LABEL: test_orps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; BTVER2-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -3453,7 +3453,7 @@ ; BTVER2-LABEL: test_permilpd_ymm: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [6:1.00] -; BTVER2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:0.50] +; BTVER2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -3579,7 +3579,7 @@ ; BTVER2-LABEL: test_permilps_ymm: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [6:1.00] -; BTVER2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:0.50] +; BTVER2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -3690,7 +3690,7 @@ ; ; BTVER2-LABEL: test_permilvarpd_ymm: ; BTVER2: # BB#0: -; BTVER2-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [3:3.00] ; BTVER2-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -3800,7 +3800,7 @@ ; ; BTVER2-LABEL: test_permilvarps_ymm: ; BTVER2: # BB#0: -; BTVER2-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [3:3.00] ; BTVER2-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -3925,8 +3925,8 @@ ; ; BTVER2-LABEL: test_roundpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [8:1.00] -; BTVER2-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00] +; BTVER2-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [8:2.00] +; BTVER2-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -3989,8 +3989,8 @@ ; ; BTVER2-LABEL: test_roundps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [8:1.00] -; BTVER2-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:1.00] +; BTVER2-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [8:2.00] +; BTVER2-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -4117,7 +4117,7 @@ ; ; BTVER2-LABEL: test_shufpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:0.50] +; BTVER2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] ; BTVER2-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [6:1.00] ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -4174,7 +4174,7 @@ ; ; BTVER2-LABEL: test_shufps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:0.50] +; BTVER2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] ; BTVER2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -4483,9 +4483,9 @@ ; BTVER2-LABEL: test_testpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: xorl %eax, %eax # sched: [1:0.50] -; BTVER2-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-NEXT: vtestpd %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: setb %al # sched: [1:0.50] -; BTVER2-NEXT: vtestpd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-NEXT: vtestpd (%rdi), %xmm0 # sched: [8:1.00] ; BTVER2-NEXT: adcl $0, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -4569,9 +4569,9 @@ ; BTVER2-LABEL: test_testpd_ymm: ; BTVER2: # BB#0: ; BTVER2-NEXT: xorl %eax, %eax # sched: [1:0.50] -; BTVER2-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vtestpd %ymm1, %ymm0 # sched: [4:2.00] ; BTVER2-NEXT: setb %al # sched: [1:0.50] -; BTVER2-NEXT: vtestpd (%rdi), %ymm0 # sched: [6:1.00] +; BTVER2-NEXT: vtestpd (%rdi), %ymm0 # sched: [9:3.00] ; BTVER2-NEXT: adcl $0, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -4650,9 +4650,9 @@ ; BTVER2-LABEL: test_testps: ; BTVER2: # BB#0: ; BTVER2-NEXT: xorl %eax, %eax # sched: [1:0.50] -; BTVER2-NEXT: vtestps %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-NEXT: vtestps %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: setb %al # sched: [1:0.50] -; BTVER2-NEXT: vtestps (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-NEXT: vtestps (%rdi), %xmm0 # sched: [8:1.00] ; BTVER2-NEXT: adcl $0, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -4736,9 +4736,9 @@ ; BTVER2-LABEL: test_testps_ymm: ; BTVER2: # BB#0: ; BTVER2-NEXT: xorl %eax, %eax # sched: [1:0.50] -; BTVER2-NEXT: vtestps %ymm1, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vtestps %ymm1, %ymm0 # sched: [4:2.00] ; BTVER2-NEXT: setb %al # sched: [1:0.50] -; BTVER2-NEXT: vtestps (%rdi), %ymm0 # sched: [6:1.00] +; BTVER2-NEXT: vtestps (%rdi), %ymm0 # sched: [9:3.00] ; BTVER2-NEXT: adcl $0, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -5038,7 +5038,7 @@ ; ; BTVER2-LABEL: test_xorpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; BTVER2-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -5105,7 +5105,7 @@ ; ; BTVER2-LABEL: test_xorps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; BTVER2-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] Index: test/CodeGen/X86/sse41-schedule.ll =================================================================== --- test/CodeGen/X86/sse41-schedule.ll +++ test/CodeGen/X86/sse41-schedule.ll @@ -2863,9 +2863,9 @@ ; ; BTVER2-LABEL: test_ptest: ; BTVER2: # BB#0: -; BTVER2-NEXT: vptest %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: setb %al # sched: [1:0.50] -; BTVER2-NEXT: vptest (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00] ; BTVER2-NEXT: setb %cl # sched: [1:0.50] ; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50] ; BTVER2-NEXT: movzbl %cl, %eax # sched: [1:0.50]