Index: lib/Target/X86/X86ScheduleBtVer2.td =================================================================== --- lib/Target/X86/X86ScheduleBtVer2.td +++ lib/Target/X86/X86ScheduleBtVer2.td @@ -535,6 +535,28 @@ } def : InstRW<[WriteVMULYPSLd, ReadAfterLd], (instregex "VMULPSYrm", "VRCPPSYm", "VRSQRTPSYm")>; +def WriteVMULPD: SchedWriteRes<[JFPU1]> { + let Latency = 4; + let ResourceCycles = [2]; +} +def : InstRW<[WriteVMULPD], (instregex "VMULPDrr", "VMULSDrr")>; + +def WriteVMULPDLd: SchedWriteRes<[JLAGU, JFPU1]> { + let Latency = 9; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteVMULPDLd], (instregex "VMULPDrm", "VMULSDrm")>; + +def WriteVMULPS: SchedWriteRes<[JFPU1]> { + let Latency = 2; +} +def : InstRW<[WriteVMULPS], (instregex "VMULPSrr", "VMULSSrr")>; + +def WriteVMULPSLd: SchedWriteRes<[JLAGU, JFPU1]> { + let Latency = 7; +} +def : InstRW<[WriteVMULPSLd], (instregex "VMULPSrm", "VMULSSrm")>; + def WriteVCVTY: SchedWriteRes<[JSTC]> { let Latency = 3; let ResourceCycles = [2]; @@ -553,12 +575,53 @@ def : InstRW<[WriteVCVTYLd, ReadAfterLd], (instregex "VCVTPS2DQYrm")>; def : InstRW<[WriteVCVTYLd, ReadAfterLd], (instregex "VCVTTPS2DQYrm")>; -def WriteVMONTPSt: SchedWriteRes<[JSTC, JLAGU]> { +def WriteVMOVTDQSt: SchedWriteRes<[JSTC, JLAGU]> { + let Latency = 2; +} +def : InstRW<[WriteVMOVTDQSt], (instregex "VMOVNTDQmr")>; + +def WriteVMOVTPSt: SchedWriteRes<[JSTC, JLAGU]> { + let Latency = 3; +} +def : InstRW<[WriteVMOVTPSt], (instregex "VMOVNTP(S|D)mr")>; + +def WriteVMONTPYSt: SchedWriteRes<[JSTC, JLAGU]> { let Latency = 3; let ResourceCycles = [2,1]; } -def : InstRW<[WriteVMONTPSt], (instregex "VMOVNTP(S|D)Ymr")>; -def : InstRW<[WriteVMONTPSt], (instregex "VMOVNTDQYmr")>; +def : InstRW<[WriteVMONTPYSt], (instregex "VMOVNTP(S|D)Ymr")>; +def : InstRW<[WriteVMONTPYSt], (instregex "VMOVNTDQYmr")>; + +def WriteVMOV: SchedWriteRes<[JFPU01]> { + let ResourceCycles = [1]; +} +def : InstRW<[WriteVMOV], (instregex "VMOVS(S|D)rr", "VMOVS(S|D)mr")>; +def : InstRW<[WriteVMOV], (instregex "VMOVUPDrr", "VMOVUPDmr")>; +def : InstRW<[WriteVMOV], (instregex "VMOVAPDrr", "VMOVAPDmr")>; +def : InstRW<[WriteVMOV], (instregex "VMOVDQArr", "VMOVDQAmr")>; +def : InstRW<[WriteVMOV], (instregex "VMOVDQUrr", "VMOVDQUmr")>; + +def WriteVMOVLd: SchedWriteRes<[JFPU01, JLAGU]> { + let Latency = 6; + let ResourceCycles = [1, 1]; +} +def : InstRW<[WriteVMOVLd], (instregex "VMOVS(S|D)rm")>; +def : InstRW<[WriteVMOVLd], (instregex "VMOVUPDrm")>; +def : InstRW<[WriteVMOVLd], (instregex "VMOVAPDrm")>; +def : InstRW<[WriteVMOVLd], (instregex "VMOVDQArm")>; +def : InstRW<[WriteVMOVLd], (instregex "VMOVDQUrm")>; + +def WriteVMAXMIN: SchedWriteRes<[JFPU0]> { + let Latency = 2; +} +def : InstRW<[WriteVMAXMIN], (instregex "VMAXP(D|S)rr", "VMAXS(D|S)rr")>; +def : InstRW<[WriteVMAXMIN], (instregex "VMINP(D|S)rr", "VMINS(D|S)rr")>; + +def WriteVMAXMINLd: SchedWriteRes<[JLAGU, JFPU0]> { + let Latency = 7; +} +def : InstRW<[WriteVMAXMINLd], (instregex "VMAXP(D|S)rm", "VMAXS(D|S)rm")>; +def : InstRW<[WriteVMAXMINLd], (instregex "VMINP(D|S)rm", "VMINS(D|S)rm")>; def WriteVCVTPDY: SchedWriteRes<[JSTC, JFPU01]> { let Latency = 6; @@ -604,6 +667,18 @@ } def : InstRW<[WriteFPAY22Ld, ReadAfterLd], (instregex "VCMPP(S|D)Yrmi", "VM(AX|IN)P(D|S)Yrm")>; +def WriteVCMPcc: SchedWriteRes<[JFPU0]> { + let Latency = 2; +} +def : InstRW<[WriteVCMPcc], (instregex "VCMPP(S|D)rri", "VCMPS(S|D)rri")>; +//def : InstRW<[WriteVCMPcc], (instregex "VCMP..P(S|D)rr", "VCMP..S(S|D)rr")>; + +def WriteVCMPccLd: SchedWriteRes<[JLAGU, JFPU0]> { + let Latency = 7; +} +def : InstRW<[WriteVCMPccLd, ReadAfterLd], (instregex "VCMPP(S|D)rmi", "VCMPS(S|D)rmi")>; +//def : InstRW<[WriteVCMPccLd, ReadAfterLd], (instregex "VCMP..P(S|D)rm", "VCMP..S(S|D)rm")>; + def WriteVHAddSubY: SchedWriteRes<[JFPU0]> { let Latency = 3; let ResourceCycles = [2]; Index: test/CodeGen/X86/mmx-schedule.ll =================================================================== --- test/CodeGen/X86/mmx-schedule.ll +++ test/CodeGen/X86/mmx-schedule.ll @@ -750,7 +750,7 @@ ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovd %edi, %xmm0 # sched: [1:0.17] ; BTVER2-NEXT: vmovq %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; BTVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00] +; BTVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:1.00] ; BTVER2-NEXT: movq -{{[0-9]+}}(%rsp), %mm1 # sched: [5:1.00] ; BTVER2-NEXT: vmovlps %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] ; BTVER2-NEXT: paddd -{{[0-9]+}}(%rsp), %mm1 # sched: [6:1.00] Index: test/CodeGen/X86/recip-fastmath.ll =================================================================== --- test/CodeGen/X86/recip-fastmath.ll +++ test/CodeGen/X86/recip-fastmath.ll @@ -39,7 +39,7 @@ ; ; BTVER2-LABEL: f32_no_estimate: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00] +; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:1.00] ; BTVER2-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [19:19.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -107,7 +107,7 @@ ; ; BTVER2-LABEL: f32_one_step: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00] +; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:1.00] ; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00] ; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] @@ -204,7 +204,7 @@ ; ; BTVER2-LABEL: f32_two_step: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [5:1.00] +; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:1.00] ; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00] ; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [2:1.00] ; BTVER2-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00] Index: test/CodeGen/X86/recip-fastmath2.ll =================================================================== --- test/CodeGen/X86/recip-fastmath2.ll +++ test/CodeGen/X86/recip-fastmath2.ll @@ -103,7 +103,7 @@ ; ; BTVER2-LABEL: f32_one_step_2: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00] +; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:1.00] ; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00] ; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] @@ -198,7 +198,7 @@ ; ; BTVER2-LABEL: f32_one_step_2_divs: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00] +; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:1.00] ; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00] ; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] @@ -311,7 +311,7 @@ ; ; BTVER2-LABEL: f32_two_step_2: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [5:1.00] +; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:1.00] ; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00] ; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [2:1.00] ; BTVER2-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00] Index: test/CodeGen/X86/sse-schedule.ll =================================================================== --- test/CodeGen/X86/sse-schedule.ll +++ test/CodeGen/X86/sse-schedule.ll @@ -353,8 +353,8 @@ ; ; BTVER2-LABEL: test_cmpps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; BTVER2-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BTVER2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [2:1.00] +; BTVER2-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -1311,8 +1311,8 @@ ; ; BTVER2-LABEL: test_maxps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BTVER2-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BTVER2-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_maxps: @@ -1378,8 +1378,8 @@ ; ; BTVER2-LABEL: test_maxss: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BTVER2-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BTVER2-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_maxss: @@ -1445,8 +1445,8 @@ ; ; BTVER2-LABEL: test_minps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BTVER2-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BTVER2-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_minps: @@ -1512,8 +1512,8 @@ ; ; BTVER2-LABEL: test_minss: ; BTVER2: # BB#0: -; BTVER2-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BTVER2-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BTVER2-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_minss: @@ -2003,7 +2003,7 @@ ; ; BTVER2-LABEL: test_movntps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] +; BTVER2-NEXT: vmovntps %xmm0, (%rdi) # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_movntps: @@ -2073,9 +2073,9 @@ ; ; BTVER2-LABEL: test_movss_mem: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00] +; BTVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:1.00] ; BTVER2-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] +; BTVER2-NEXT: vmovss %xmm0, (%rsi) # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_movss_mem: @@ -2642,7 +2642,7 @@ ; ; BTVER2-LABEL: test_rcpss: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00] +; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:1.00] ; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [7:1.00] ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -2814,7 +2814,7 @@ ; ; BTVER2-LABEL: test_rsqrtss: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00] +; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:1.00] ; BTVER2-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [7:1.00] ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] Index: test/CodeGen/X86/sse2-schedule.ll =================================================================== --- test/CodeGen/X86/sse2-schedule.ll +++ test/CodeGen/X86/sse2-schedule.ll @@ -428,8 +428,8 @@ ; ; BTVER2-LABEL: test_cmppd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; BTVER2-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BTVER2-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [2:1.00] +; BTVER2-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -1369,7 +1369,7 @@ ; ; BTVER2-LABEL: test_cvtsd2ss: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00] +; BTVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [6:1.00] ; BTVER2-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [3:1.00] ; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -1613,7 +1613,7 @@ ; ; BTVER2-LABEL: test_cvtss2sd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00] +; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:1.00] ; BTVER2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [3:1.00] ; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -2309,8 +2309,8 @@ ; ; BTVER2-LABEL: test_maxpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BTVER2-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BTVER2-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_maxpd: @@ -2376,8 +2376,8 @@ ; ; BTVER2-LABEL: test_maxsd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BTVER2-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BTVER2-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_maxsd: @@ -2443,8 +2443,8 @@ ; ; BTVER2-LABEL: test_minpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BTVER2-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BTVER2-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_minpd: @@ -2510,8 +2510,8 @@ ; ; BTVER2-LABEL: test_minsd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BTVER2-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BTVER2-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_minsd: @@ -2585,9 +2585,9 @@ ; ; BTVER2-LABEL: test_movapd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmovapd (%rdi), %xmm0 # sched: [5:1.00] +; BTVER2-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] +; BTVER2-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_movapd: @@ -2661,9 +2661,9 @@ ; ; BTVER2-LABEL: test_movdqa: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmovdqa (%rdi), %xmm0 # sched: [5:1.00] +; BTVER2-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] +; BTVER2-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_movdqa: @@ -2737,9 +2737,9 @@ ; ; BTVER2-LABEL: test_movdqu: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmovdqu (%rdi), %xmm0 # sched: [5:1.00] +; BTVER2-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] +; BTVER2-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_movdqu: @@ -3245,7 +3245,7 @@ ; BTVER2-LABEL: test_movntdqa: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] +; BTVER2-NEXT: vmovntdq %xmm0, (%rdi) # sched: [2:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_movntdqa: @@ -3310,7 +3310,7 @@ ; BTVER2-LABEL: test_movntpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] +; BTVER2-NEXT: vmovntpd %xmm0, (%rdi) # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_movntpd: @@ -3527,9 +3527,9 @@ ; ; BTVER2-LABEL: test_movsd_mem: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00] +; BTVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:1.00] ; BTVER2-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] +; BTVER2-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_movsd_mem: @@ -3664,9 +3664,9 @@ ; ; BTVER2-LABEL: test_movupd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmovupd (%rdi), %xmm0 # sched: [5:1.00] +; BTVER2-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] +; BTVER2-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_movupd: @@ -3732,8 +3732,8 @@ ; ; BTVER2-LABEL: test_mulpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BTVER2-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:2.00] +; BTVER2-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_mulpd: @@ -3798,8 +3798,8 @@ ; ; BTVER2-LABEL: test_mulsd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BTVER2-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:2.00] +; BTVER2-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_mulsd: @@ -8904,7 +8904,7 @@ ; ; BTVER2-LABEL: test_sqrtsd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vmovapd (%rdi), %xmm1 # sched: [5:1.00] +; BTVER2-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:1.00] ; BTVER2-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [26:21.00] ; BTVER2-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [26:21.00] ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]