Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -649,7 +649,7 @@ let hasSideEffects = 0 in def rr : PI, - Sched<[WriteFShuffle]>; + Sched<[WriteMove]>; let canFoldAsLoad = 1, isReMaterializable = 1 in def rm : PI, FoldGenData<"MOVAPSrr">; Index: test/CodeGen/X86/avx-schedule.ll =================================================================== --- test/CodeGen/X86/avx-schedule.ll +++ test/CodeGen/X86/avx-schedule.ll @@ -2103,7 +2103,7 @@ ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:0.50] ; ZNVER1-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [4:0.50] -; ZNVER1-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.50] +; ZNVER1-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.25] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x i64> %a1) call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x i64> %a1, <2 x double> %a2) @@ -2166,7 +2166,7 @@ ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [8:1.00] ; ZNVER1-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00] -; ZNVER1-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.50] +; ZNVER1-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.25] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %a1) call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x i64> %a1, <4 x double> %a2) @@ -2229,7 +2229,7 @@ ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:0.50] ; ZNVER1-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [4:0.50] -; ZNVER1-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.50] +; ZNVER1-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.25] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x i32> %a1) call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x i32> %a1, <4 x float> %a2) @@ -2292,7 +2292,7 @@ ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [8:1.00] ; ZNVER1-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00] -; ZNVER1-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.50] +; ZNVER1-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.25] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %a1) call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x i32> %a1, <8 x float> %a2) Index: test/CodeGen/X86/sha-schedule.ll =================================================================== --- test/CodeGen/X86/sha-schedule.ll +++ test/CodeGen/X86/sha-schedule.ll @@ -210,11 +210,11 @@ ; ; GOLDMONT-LABEL: test_sha256rnds2: ; GOLDMONT: # %bb.0: -; GOLDMONT-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00] -; GOLDMONT-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] +; GOLDMONT-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.50] +; GOLDMONT-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] ; GOLDMONT-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:1.00] ; GOLDMONT-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [7:1.00] -; GOLDMONT-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00] +; GOLDMONT-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.50] ; GOLDMONT-NEXT: retq # sched: [4:1.00] ; ; CANNONLAKE-LABEL: test_sha256rnds2: @@ -228,11 +228,11 @@ ; ; ZNVER1-LABEL: test_sha256rnds2: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovaps %xmm0, %xmm3 # sched: [1:0.50] -; ZNVER1-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.50] +; ZNVER1-NEXT: vmovaps %xmm0, %xmm3 # sched: [1:0.25] +; ZNVER1-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.25] ; ZNVER1-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:1.00] ; ZNVER1-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [11:1.00] -; ZNVER1-NEXT: vmovaps %xmm3, %xmm0 # sched: [1:0.50] +; ZNVER1-NEXT: vmovaps %xmm3, %xmm0 # sched: [1:0.25] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = load <4 x i32>, <4 x i32>* %a3 %2 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) Index: test/CodeGen/X86/sse-schedule.ll =================================================================== --- test/CodeGen/X86/sse-schedule.ll +++ test/CodeGen/X86/sse-schedule.ll @@ -2557,7 +2557,7 @@ ; SLM-NEXT: rcpps (%rdi), %xmm1 # sched: [8:1.00] ; SLM-NEXT: rcpps %xmm0, %xmm0 # sched: [5:1.00] ; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] -; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] +; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_rcpps: @@ -2729,7 +2729,7 @@ ; SLM-NEXT: rsqrtps (%rdi), %xmm1 # sched: [8:1.00] ; SLM-NEXT: rsqrtps %xmm0, %xmm0 # sched: [5:1.00] ; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] -; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] +; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_rsqrtps: @@ -3038,7 +3038,7 @@ ; SLM-NEXT: sqrtps (%rdi), %xmm1 # sched: [18:1.00] ; SLM-NEXT: sqrtps %xmm0, %xmm0 # sched: [15:1.00] ; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] -; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] +; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_sqrtps: Index: test/CodeGen/X86/sse2-schedule.ll =================================================================== --- test/CodeGen/X86/sse2-schedule.ll +++ test/CodeGen/X86/sse2-schedule.ll @@ -3564,7 +3564,7 @@ ; SLM-LABEL: test_movsd_reg: ; SLM: # %bb.0: ; SLM-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] -; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] +; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_movsd_reg: @@ -8756,7 +8756,7 @@ ; SLM-NEXT: sqrtpd (%rdi), %xmm1 # sched: [18:1.00] ; SLM-NEXT: sqrtpd %xmm0, %xmm0 # sched: [15:1.00] ; SLM-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] -; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] +; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_sqrtpd: @@ -9284,10 +9284,10 @@ ; SLM-LABEL: test_unpcklpd: ; SLM: # %bb.0: ; SLM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SLM-NEXT: movapd %xmm0, %xmm1 # sched: [1:1.00] +; SLM-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.50] ; SLM-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00] ; SLM-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] -; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] +; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_unpcklpd: Index: test/CodeGen/X86/sse41-schedule.ll =================================================================== --- test/CodeGen/X86/sse41-schedule.ll +++ test/CodeGen/X86/sse41-schedule.ll @@ -163,11 +163,11 @@ ; ; SLM-LABEL: test_blendvpd: ; SLM: # %bb.0: -; SLM-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00] -; SLM-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] +; SLM-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.50] +; SLM-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] ; SLM-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [1:1.00] ; SLM-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [4:1.00] -; SLM-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00] +; SLM-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_blendvpd: @@ -230,11 +230,11 @@ ; ; SLM-LABEL: test_blendvps: ; SLM: # %bb.0: -; SLM-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00] -; SLM-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] +; SLM-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.50] +; SLM-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] ; SLM-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [1:1.00] ; SLM-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [4:1.00] -; SLM-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00] +; SLM-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_blendvps: @@ -717,7 +717,7 @@ ; SLM-LABEL: test_pblendvb: ; SLM: # %bb.0: ; SLM-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.50] -; SLM-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] +; SLM-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] ; SLM-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [1:1.00] ; SLM-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [4:1.00] ; SLM-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.50] @@ -2991,7 +2991,7 @@ ; SLM-NEXT: roundpd $7, (%rdi), %xmm1 # sched: [6:1.00] ; SLM-NEXT: roundpd $7, %xmm0, %xmm0 # sched: [3:1.00] ; SLM-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] -; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] +; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_roundpd: @@ -3063,7 +3063,7 @@ ; SLM-NEXT: roundps $7, (%rdi), %xmm1 # sched: [6:1.00] ; SLM-NEXT: roundps $7, %xmm0, %xmm0 # sched: [3:1.00] ; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] -; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] +; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_roundps: @@ -3133,7 +3133,7 @@ ; ; SLM-LABEL: test_roundsd: ; SLM: # %bb.0: -; SLM-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] +; SLM-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50] ; SLM-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00] ; SLM-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] @@ -3206,7 +3206,7 @@ ; ; SLM-LABEL: test_roundss: ; SLM: # %bb.0: -; SLM-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00] +; SLM-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.50] ; SLM-NEXT: roundss $7, (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00] ; SLM-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00]