Index: lib/Target/X86/X86ScheduleBtVer2.td
===================================================================
--- lib/Target/X86/X86ScheduleBtVer2.td
+++ lib/Target/X86/X86ScheduleBtVer2.td
@@ -559,6 +559,28 @@
 }
 def : InstRW<[WriteVMULYPSLd, ReadAfterLd], (instregex "VMULPSYrm", "VRCPPSYm", "VRSQRTPSYm")>;
 
+def WriteVMULPD: SchedWriteRes<[JFPU1]> {
+  let Latency = 4;
+  let ResourceCycles = [2];
+}
+def : InstRW<[WriteVMULPD], (instregex "VMULPDrr", "VMULSDrr")>;
+
+def WriteVMULPDLd: SchedWriteRes<[JLAGU, JFPU1]> {
+  let Latency = 9;
+  let ResourceCycles = [1, 2];
+}
+def : InstRW<[WriteVMULPDLd], (instregex "VMULPDrm", "VMULSDrm")>;
+
+def WriteVMULPS: SchedWriteRes<[JFPU1]> {
+  let Latency = 2;
+}
+def : InstRW<[WriteVMULPS], (instregex "VMULPSrr", "VMULSSrr")>;
+
+def WriteVMULPSLd: SchedWriteRes<[JLAGU, JFPU1]> {
+  let Latency = 7;
+}
+def : InstRW<[WriteVMULPSLd], (instregex "VMULPSrm", "VMULSSrm")>;
+
 def WriteVCVTY: SchedWriteRes<[JSTC]> {
   let Latency = 3;
   let ResourceCycles = [2];
@@ -577,12 +599,62 @@
 def : InstRW<[WriteVCVTYLd, ReadAfterLd], (instregex "VCVTPS2DQYrm")>;
 def : InstRW<[WriteVCVTYLd, ReadAfterLd], (instregex "VCVTTPS2DQYrm")>;
 
-def WriteVMONTPSt: SchedWriteRes<[JSTC, JLAGU]> {
+def WriteVMOVTDQSt: SchedWriteRes<[JSTC, JSAGU]> {
+  let Latency = 2;
+}
+def : InstRW<[WriteVMOVTDQSt], (instregex "VMOVNTDQmr")>;
+
+def WriteVMOVTPSt: SchedWriteRes<[JSTC, JSAGU]> {
+  let Latency = 3;
+}
+def : InstRW<[WriteVMOVTPSt], (instregex "VMOVNTP(S|D)mr")>;
+
+def WriteVMONTPYSt: SchedWriteRes<[JSTC, JSAGU]> {
   let Latency = 3;
   let ResourceCycles = [2,1];
 }
-def : InstRW<[WriteVMONTPSt], (instregex "VMOVNTP(S|D)Ymr")>;
-def : InstRW<[WriteVMONTPSt], (instregex "VMOVNTDQYmr")>;
+def : InstRW<[WriteVMONTPYSt], (instregex "VMOVNTP(S|D)Ymr")>;
+def : InstRW<[WriteVMONTPYSt], (instregex "VMOVNTDQYmr")>;
+
+def WriteVecMove: SchedWriteRes<[JFPU01]> {}
+def WriteVecStore: SchedWriteRes<[JFPU01, JSAGU]> {}
+def WriteVecLoad: SchedWriteRes<[JLAGU, JFPU01]> {
+  let Latency = 6;
+}
+
+def : InstRW<[WriteVecMove], (instregex "VMOVS(S|D)rr")>;
+def : InstRW<[WriteVecMove], (instregex "VMOVUPDrr")>;
+def : InstRW<[WriteVecMove], (instregex "VMOVAPDrr")>;
+def : InstRW<[WriteVecMove], (instregex "VMOVDQArr")>;
+def : InstRW<[WriteVecMove], (instregex "VMOVDQUrr")>;
+
+def : InstRW<[WriteVecStore], (instregex "VMOVS(S|D)mr")>;
+def : InstRW<[WriteVecStore], (instregex "VMOVUPDmr")>;
+def : InstRW<[WriteVecStore], (instregex "VMOVAPDmr")>;
+def : InstRW<[WriteVecStore], (instregex "VMOVDQAmr")>;
+def : InstRW<[WriteVecStore], (instregex "VMOVDQUmr")>;
+
+def : InstRW<[WriteVecLoad], (instregex "VMOVS(S|D)rm")>;
+def : InstRW<[WriteVecLoad], (instregex "VMOVUPDrm")>;
+def : InstRW<[WriteVecLoad], (instregex "VMOVAPDrm")>;
+def : InstRW<[WriteVecLoad], (instregex "VMOVDQArm")>;
+def : InstRW<[WriteVecLoad], (instregex "VMOVDQUrm")>;
+
+def WriteFCmp: SchedWriteRes<[JFPU0]> {
+  let Latency = 2;
+}
+
+def : InstRW<[WriteFCmp], (instregex "VMAXP(D|S)rr", "VMAXS(D|S)rr")>;
+def : InstRW<[WriteFCmp], (instregex "VMINP(D|S)rr", "VMINS(D|S)rr")>;
+def : InstRW<[WriteFCmp], (instregex "VCMPP(S|D)rri", "VCMPS(S|D)rri")>;
+
+def WriteFCmpLd: SchedWriteRes<[JLAGU, JFPU0]> {
+  let Latency = 7;
+}
+
+def : InstRW<[WriteFCmpLd], (instregex "VMAXP(D|S)rm", "VMAXS(D|S)rm")>;
+def : InstRW<[WriteFCmpLd], (instregex "VMINP(D|S)rm", "VMINS(D|S)rm")>;
+def : InstRW<[WriteFCmpLd], (instregex "VCMPP(S|D)rmi", "VCMPS(S|D)rmi")>;
 
 def WriteVCVTPDY: SchedWriteRes<[JSTC, JFPU01]> {
   let Latency = 6;
Index: test/CodeGen/X86/mmx-schedule.ll
===================================================================
--- test/CodeGen/X86/mmx-schedule.ll
+++ test/CodeGen/X86/mmx-schedule.ll
@@ -750,7 +750,7 @@
 ; BTVER2:       # BB#0:
 ; BTVER2-NEXT:    vmovd %edi, %xmm0 # sched: [1:0.17]
 ; BTVER2-NEXT:    vmovq %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; BTVER2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:1.00]
 ; BTVER2-NEXT:    movq -{{[0-9]+}}(%rsp), %mm1 # sched: [5:1.00]
 ; BTVER2-NEXT:    vmovlps %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
 ; BTVER2-NEXT:    paddd -{{[0-9]+}}(%rsp), %mm1 # sched: [6:1.00]
Index: test/CodeGen/X86/recip-fastmath.ll
===================================================================
--- test/CodeGen/X86/recip-fastmath.ll
+++ test/CodeGen/X86/recip-fastmath.ll
@@ -39,7 +39,7 @@
 ;
 ; BTVER2-LABEL: f32_no_estimate:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:1.00]
 ; BTVER2-NEXT:    vdivss %xmm0, %xmm1, %xmm0 # sched: [19:19.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
@@ -107,7 +107,7 @@
 ;
 ; BTVER2-LABEL: f32_one_step:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:1.00]
 ; BTVER2-NEXT:    vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00]
 ; BTVER2-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
@@ -204,7 +204,7 @@
 ;
 ; BTVER2-LABEL: f32_two_step:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:1.00]
 ; BTVER2-NEXT:    vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00]
 ; BTVER2-NEXT:    vmulss %xmm1, %xmm0, %xmm2 # sched: [2:1.00]
 ; BTVER2-NEXT:    vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
Index: test/CodeGen/X86/recip-fastmath2.ll
===================================================================
--- test/CodeGen/X86/recip-fastmath2.ll
+++ test/CodeGen/X86/recip-fastmath2.ll
@@ -103,7 +103,7 @@
 ;
 ; BTVER2-LABEL: f32_one_step_2:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:1.00]
 ; BTVER2-NEXT:    vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00]
 ; BTVER2-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
@@ -198,7 +198,7 @@
 ;
 ; BTVER2-LABEL: f32_one_step_2_divs:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:1.00]
 ; BTVER2-NEXT:    vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00]
 ; BTVER2-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
@@ -311,7 +311,7 @@
 ;
 ; BTVER2-LABEL: f32_two_step_2:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:1.00]
 ; BTVER2-NEXT:    vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00]
 ; BTVER2-NEXT:    vmulss %xmm1, %xmm0, %xmm2 # sched: [2:1.00]
 ; BTVER2-NEXT:    vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
Index: test/CodeGen/X86/sse-schedule.ll
===================================================================
--- test/CodeGen/X86/sse-schedule.ll
+++ test/CodeGen/X86/sse-schedule.ll
@@ -353,8 +353,8 @@
 ;
 ; BTVER2-LABEL: test_cmpps:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-NEXT:    vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [2:1.00]
+; BTVER2-NEXT:    vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
@@ -1311,8 +1311,8 @@
 ;
 ; BTVER2-LABEL: test_maxps:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vmaxps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT:    vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_maxps:
@@ -1378,8 +1378,8 @@
 ;
 ; BTVER2-LABEL: test_maxss:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT:    vmaxss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT:    vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_maxss:
@@ -1445,8 +1445,8 @@
 ;
 ; BTVER2-LABEL: test_minps:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vminps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT:    vminps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT:    vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_minps:
@@ -1512,8 +1512,8 @@
 ;
 ; BTVER2-LABEL: test_minss:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT:    vminss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT:    vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_minss:
@@ -2003,7 +2003,7 @@
 ;
 ; BTVER2-LABEL: test_movntps:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vmovntps %xmm0, (%rdi) # sched: [1:1.00]
+; BTVER2-NEXT:    vmovntps %xmm0, (%rdi) # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_movntps:
@@ -2073,7 +2073,7 @@
 ;
 ; BTVER2-LABEL: test_movss_mem:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:1.00]
 ; BTVER2-NEXT:    vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    vmovss %xmm0, (%rsi) # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
@@ -2642,7 +2642,7 @@
 ;
 ; BTVER2-LABEL: test_rcpss:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:1.00]
 ; BTVER2-NEXT:    vrcpss %xmm0, %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    vrcpss %xmm1, %xmm1, %xmm1 # sched: [7:1.00]
 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -2814,7 +2814,7 @@
 ;
 ; BTVER2-LABEL: test_rsqrtss:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:1.00]
 ; BTVER2-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [7:1.00]
 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
Index: test/CodeGen/X86/sse2-schedule.ll
===================================================================
--- test/CodeGen/X86/sse2-schedule.ll
+++ test/CodeGen/X86/sse2-schedule.ll
@@ -428,8 +428,8 @@
 ;
 ; BTVER2-LABEL: test_cmppd:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [2:1.00]
+; BTVER2-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
@@ -1369,7 +1369,7 @@
 ;
 ; BTVER2-LABEL: test_cvtsd2ss:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
+; BTVER2-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [6:1.00]
 ; BTVER2-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [3:1.00]
 ; BTVER2-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -1613,7 +1613,7 @@
 ;
 ; BTVER2-LABEL: test_cvtss2sd:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:1.00]
 ; BTVER2-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [3:1.00]
 ; BTVER2-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -2309,8 +2309,8 @@
 ;
 ; BTVER2-LABEL: test_maxpd:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_maxpd:
@@ -2376,8 +2376,8 @@
 ;
 ; BTVER2-LABEL: test_maxsd:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_maxsd:
@@ -2443,8 +2443,8 @@
 ;
 ; BTVER2-LABEL: test_minpd:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_minpd:
@@ -2510,8 +2510,8 @@
 ;
 ; BTVER2-LABEL: test_minsd:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BTVER2-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_minsd:
@@ -2585,7 +2585,7 @@
 ;
 ; BTVER2-LABEL: test_movapd:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vmovapd (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-NEXT:    vmovapd (%rdi), %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    vmovapd %xmm0, (%rsi) # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
@@ -2661,7 +2661,7 @@
 ;
 ; BTVER2-LABEL: test_movdqa:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
@@ -2737,7 +2737,7 @@
 ;
 ; BTVER2-LABEL: test_movdqu:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
@@ -3245,7 +3245,7 @@
 ; BTVER2-LABEL: test_movntdqa:
 ; BTVER2:       # BB#0:
 ; BTVER2-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
+; BTVER2-NEXT:    vmovntdq %xmm0, (%rdi) # sched: [2:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_movntdqa:
@@ -3310,7 +3310,7 @@
 ; BTVER2-LABEL: test_movntpd:
 ; BTVER2:       # BB#0:
 ; BTVER2-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
+; BTVER2-NEXT:    vmovntpd %xmm0, (%rdi) # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_movntpd:
@@ -3527,7 +3527,7 @@
 ;
 ; BTVER2-LABEL: test_movsd_mem:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00]
+; BTVER2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:1.00]
 ; BTVER2-NEXT:    vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    vmovsd %xmm0, (%rsi) # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
@@ -3664,7 +3664,7 @@
 ;
 ; BTVER2-LABEL: test_movupd:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vmovupd (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-NEXT:    vmovupd (%rdi), %xmm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    vmovupd %xmm0, (%rsi) # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
@@ -3732,8 +3732,8 @@
 ;
 ; BTVER2-LABEL: test_mulpd:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BTVER2-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
+; BTVER2-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_mulpd:
@@ -3798,8 +3798,8 @@
 ;
 ; BTVER2-LABEL: test_mulsd:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BTVER2-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
+; BTVER2-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_mulsd:
@@ -8904,7 +8904,7 @@
 ;
 ; BTVER2-LABEL: test_sqrtsd:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vmovapd (%rdi), %xmm1 # sched: [5:1.00]
+; BTVER2-NEXT:    vmovapd (%rdi), %xmm1 # sched: [6:1.00]
 ; BTVER2-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [26:21.00]
 ; BTVER2-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [26:21.00]
 ; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]