Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1633,7 +1633,12 @@ break; } case ISD::ADD: - case ISD::SUB: { + case ISD::SUB: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: { APInt SrcUndef, SrcZero; if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef, SrcZero, TLO, Depth + 1)) Index: test/CodeGen/X86/avx-schedule.ll =================================================================== --- test/CodeGen/X86/avx-schedule.ll +++ test/CodeGen/X86/avx-schedule.ll @@ -502,56 +502,48 @@ define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; GENERIC-LABEL: test_blendpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50] ; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_blendpd: ; SANDY: # %bb.0: -; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50] ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_blendpd: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33] ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_blendpd: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33] ; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; BROADWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [7:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_blendpd: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33] ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_blendpd: ; SKX: # %bb.0: -; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33] ; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_blendpd: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:1.00] ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [6:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_blendpd: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50] ; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; ZNVER1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50] ; ZNVER1-NEXT: retq # sched: [1:0.50] Index: test/CodeGen/X86/avx512-hadd-hsub.ll =================================================================== --- test/CodeGen/X86/avx512-hadd-hsub.ll +++ test/CodeGen/X86/avx512-hadd-hsub.ll @@ -178,16 +178,16 @@ define <4 x double> @fadd_noundef_low(<8 x double> %x225, <8 x double> %x227) { ; KNL-LABEL: fadd_noundef_low: ; KNL: # %bb.0: -; KNL-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] -; KNL-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; KNL-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; KNL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; KNL-NEXT: vaddpd %zmm0, %zmm2, %zmm0 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: fadd_noundef_low: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] -; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; SKX-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; SKX-NEXT: vaddpd %zmm0, %zmm2, %zmm0 ; SKX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; SKX-NEXT: retq @@ -252,17 +252,15 @@ define double @fadd_noundef_eel(<8 x double> %x225, <8 x double> %x227) { ; KNL-LABEL: fadd_noundef_eel: ; KNL: # %bb.0: -; KNL-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] -; KNL-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] -; KNL-NEXT: vaddpd %zmm0, %zmm2, %zmm0 +; KNL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; KNL-NEXT: vaddpd %zmm1, %zmm0, %zmm0 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: fadd_noundef_eel: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] -; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] -; SKX-NEXT: vaddpd %zmm0, %zmm2, %zmm0 +; SKX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; SKX-NEXT: vaddpd %zmm1, %zmm0, %zmm0 ; SKX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -278,18 +276,18 @@ define double @fsub_noundef_ee (<8 x double> %x225, <8 x double> %x227) { ; KNL-LABEL: fsub_noundef_ee: ; KNL: # %bb.0: -; KNL-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] -; KNL-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] -; KNL-NEXT: vsubpd %zmm0, %zmm2, %zmm0 +; KNL-NEXT: vextractf32x4 $2, %zmm1, %xmm0 +; KNL-NEXT: vbroadcastsd %xmm0, %zmm0 +; KNL-NEXT: vsubpd %zmm1, %zmm0, %zmm0 ; KNL-NEXT: vextractf32x4 $2, %zmm0, %xmm0 ; KNL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; KNL-NEXT: retq ; ; SKX-LABEL: fsub_noundef_ee: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] -; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] -; SKX-NEXT: vsubpd %zmm0, %zmm2, %zmm0 +; SKX-NEXT: vextractf32x4 $2, %zmm1, %xmm0 +; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 +; SKX-NEXT: vsubpd %zmm1, %zmm0, %zmm0 ; SKX-NEXT: vextractf32x4 $2, %zmm0, %xmm0 ; SKX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; SKX-NEXT: vzeroupper Index: test/CodeGen/X86/avx512-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics-fast-isel.ll +++ test/CodeGen/X86/avx512-intrinsics-fast-isel.ll @@ -7210,8 +7210,7 @@ ; X86-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X86-NEXT: vaddpd %xmm1, %xmm0, %xmm0 -; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; X86-NEXT: vaddpd %xmm1, %xmm0, %xmm0 +; X86-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovlpd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) ; X86-NEXT: movl %ebp, %esp @@ -7226,8 +7225,7 @@ ; X64-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ; X64-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X64-NEXT: vaddpd %xmm1, %xmm0, %xmm0 -; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; X64-NEXT: vaddpd %xmm1, %xmm0, %xmm0 +; X64-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -7301,8 +7299,7 @@ ; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0 -; X86-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2] -; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; X86-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) ; X86-NEXT: flds (%esp) ; X86-NEXT: popl %eax @@ -7318,8 +7315,7 @@ ; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0 -; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2] -; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; X64-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -7351,7 +7347,7 @@ ; X86-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; X86-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; X86-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2] +; X86-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; X86-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) ; X86-NEXT: flds (%esp) @@ -7368,7 +7364,7 @@ ; X64-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; X64-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2] +; X64-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; X64-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq @@ -7407,8 +7403,7 @@ ; X86-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X86-NEXT: vaddpd %xmm1, %xmm0, %xmm0 -; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; X86-NEXT: vaddpd %xmm1, %xmm0, %xmm0 +; X86-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovlpd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) ; X86-NEXT: movl %ebp, %esp @@ -7425,8 +7420,7 @@ ; X64-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ; X64-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X64-NEXT: vaddpd %xmm1, %xmm0, %xmm0 -; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; X64-NEXT: vaddpd %xmm1, %xmm0, %xmm0 +; X64-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -7513,8 +7507,7 @@ ; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0 -; X86-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2] -; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; X86-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) ; X86-NEXT: flds (%esp) ; X86-NEXT: popl %eax @@ -7532,8 +7525,7 @@ ; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0 -; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2] -; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; X64-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -7570,7 +7562,7 @@ ; X86-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; X86-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; X86-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2] +; X86-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; X86-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) ; X86-NEXT: flds (%esp) @@ -7590,7 +7582,7 @@ ; X64-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; X64-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2] +; X64-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; X64-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq Index: test/CodeGen/X86/sse-schedule.ll =================================================================== --- test/CodeGen/X86/sse-schedule.ll +++ test/CodeGen/X86/sse-schedule.ll @@ -2710,7 +2710,8 @@ define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; GENERIC-LABEL: test_movhps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; GENERIC-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] +; GENERIC-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] sched: [1:0.50] ; GENERIC-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] ; GENERIC-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] ; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] @@ -2718,7 +2719,8 @@ ; ; ATOM-LABEL: test_movhps: ; ATOM: # %bb.0: -; ATOM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] +; ATOM-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00] +; ATOM-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] sched: [1:1.00] ; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00] ; ATOM-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] ; ATOM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] @@ -2726,7 +2728,8 @@ ; ; SLM-LABEL: test_movhps: ; SLM: # %bb.0: -; SLM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00] +; SLM-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [3:1.00] +; SLM-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] sched: [1:1.00] ; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] ; SLM-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] ; SLM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] @@ -2734,7 +2737,8 @@ ; ; SANDY-SSE-LABEL: test_movhps: ; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; SANDY-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] +; SANDY-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] sched: [1:0.50] ; SANDY-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] ; SANDY-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] ; SANDY-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] @@ -2742,14 +2746,15 @@ ; ; SANDY-LABEL: test_movhps: ; SANDY: # %bb.0: -; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; SANDY-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-SSE-LABEL: test_movhps: ; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; HASWELL-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] +; HASWELL-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] sched: [1:1.00] ; HASWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] ; HASWELL-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] ; HASWELL-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] @@ -2757,14 +2762,15 @@ ; ; HASWELL-LABEL: test_movhps: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; HASWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-SSE-LABEL: test_movhps: ; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; BROADWELL-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] +; BROADWELL-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] sched: [1:1.00] ; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] ; BROADWELL-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] ; BROADWELL-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] @@ -2772,14 +2778,15 @@ ; ; BROADWELL-LABEL: test_movhps: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; BROADWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50] ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-SSE-LABEL: test_movhps: ; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; SKYLAKE-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] +; SKYLAKE-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] sched: [1:1.00] ; SKYLAKE-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50] ; SKYLAKE-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] ; SKYLAKE-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] @@ -2787,14 +2794,15 @@ ; ; SKYLAKE-LABEL: test_movhps: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; SKYLAKE-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50] ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-SSE-LABEL: test_movhps: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; SKX-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] +; SKX-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] sched: [1:1.00] ; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50] ; SKX-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] ; SKX-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] @@ -2802,14 +2810,15 @@ ; ; SKX-LABEL: test_movhps: ; SKX: # %bb.0: -; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; SKX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50] ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_movhps: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; BTVER2-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00] +; BTVER2-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] sched: [1:0.50] ; BTVER2-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] ; BTVER2-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:0.50] ; BTVER2-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [2:1.00] @@ -2817,14 +2826,15 @@ ; ; BTVER2-LABEL: test_movhps: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; BTVER2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:1.00] ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_movhps: ; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] +; ZNVER1-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] sched: [1:0.25] ; ZNVER1-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] ; ZNVER1-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:0.50] ; ZNVER1-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:0.50] @@ -2832,7 +2842,7 @@ ; ; ZNVER1-LABEL: test_movhps: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] +; ZNVER1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [8:0.50] ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; ZNVER1-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:3.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] @@ -2958,119 +2968,119 @@ define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; GENERIC-LABEL: test_movlps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] +; GENERIC-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] ; GENERIC-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] ; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movlps: ; ATOM: # %bb.0: -; ATOM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00] +; ATOM-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00] ; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00] ; ATOM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_movlps: ; SLM: # %bb.0: -; SLM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [4:1.00] +; SLM-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero sched: [3:1.00] ; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] ; SLM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-SSE-LABEL: test_movlps: ; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] +; SANDY-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] ; SANDY-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] ; SANDY-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_movlps: ; SANDY: # %bb.0: -; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] +; SANDY-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-SSE-LABEL: test_movlps: ; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; HASWELL-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] ; HASWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] ; HASWELL-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; HASWELL-LABEL: test_movlps: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; HASWELL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-SSE-LABEL: test_movlps: ; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; BROADWELL-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] ; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] ; BROADWELL-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_movlps: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; BROADWELL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-SSE-LABEL: test_movlps: ; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; SKYLAKE-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] ; SKYLAKE-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50] ; SKYLAKE-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movlps: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; SKYLAKE-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-SSE-LABEL: test_movlps: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; SKX-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] ; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50] ; SKX-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movlps: ; SKX: # %bb.0: -; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; SKX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_movlps: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; BTVER2-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00] ; BTVER2-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] ; BTVER2-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [2:1.00] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_movlps: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; BTVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00] ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vmovlps %xmm0, (%rdi) # sched: [2:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_movlps: ; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50] +; ZNVER1-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50] ; ZNVER1-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] ; ZNVER1-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:0.50] ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] ; ; ZNVER1-LABEL: test_movlps: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50] +; ZNVER1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50] ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; ZNVER1-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:0.50] ; ZNVER1-NEXT: retq # sched: [1:0.50] Index: test/CodeGen/X86/sse2-schedule.ll =================================================================== --- test/CodeGen/X86/sse2-schedule.ll +++ test/CodeGen/X86/sse2-schedule.ll @@ -4867,7 +4867,7 @@ ; ; SANDY-LABEL: test_movhpd: ; SANDY: # %bb.0: -; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; SANDY-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] @@ -4881,7 +4881,7 @@ ; ; HASWELL-LABEL: test_movhpd: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; HASWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] @@ -4895,7 +4895,7 @@ ; ; BROADWELL-LABEL: test_movhpd: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; BROADWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50] ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] @@ -4909,7 +4909,7 @@ ; ; SKYLAKE-LABEL: test_movhpd: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; SKYLAKE-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50] ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] @@ -4923,7 +4923,7 @@ ; ; SKX-LABEL: test_movhpd: ; SKX: # %bb.0: -; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; SKX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50] ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] @@ -4937,7 +4937,7 @@ ; ; BTVER2-LABEL: test_movhpd: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; BTVER2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:1.00] ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [2:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -4951,7 +4951,7 @@ ; ; ZNVER1-LABEL: test_movhpd: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] +; ZNVER1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [8:0.50] ; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; ZNVER1-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:0.50] ; ZNVER1-NEXT: retq # sched: [1:0.50] @@ -4967,119 +4967,119 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; GENERIC-LABEL: test_movlpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] +; GENERIC-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] ; GENERIC-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] ; GENERIC-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movlpd: ; ATOM: # %bb.0: -; ATOM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00] +; ATOM-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00] ; ATOM-NEXT: addpd %xmm0, %xmm1 # sched: [6:3.00] ; ATOM-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_movlpd: ; SLM: # %bb.0: -; SLM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [4:1.00] +; SLM-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero sched: [3:1.00] ; SLM-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] ; SLM-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-SSE-LABEL: test_movlpd: ; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] +; SANDY-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] ; SANDY-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] ; SANDY-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_movlpd: ; SANDY: # %bb.0: -; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] +; SANDY-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-SSE-LABEL: test_movlpd: ; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; HASWELL-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] ; HASWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] ; HASWELL-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; HASWELL-LABEL: test_movlpd: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; HASWELL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-SSE-LABEL: test_movlpd: ; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; BROADWELL-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] ; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] ; BROADWELL-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_movlpd: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; BROADWELL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-SSE-LABEL: test_movlpd: ; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; SKYLAKE-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] ; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50] ; SKYLAKE-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movlpd: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; SKYLAKE-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-SSE-LABEL: test_movlpd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; SKX-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] ; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50] ; SKX-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movlpd: ; SKX: # %bb.0: -; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; SKX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_movlpd: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; BTVER2-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00] ; BTVER2-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] ; BTVER2-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [2:1.00] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_movlpd: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; BTVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00] ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vmovlpd %xmm0, (%rdi) # sched: [2:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_movlpd: ; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50] +; ZNVER1-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50] ; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] ; ZNVER1-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:0.50] ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] ; ; ZNVER1-LABEL: test_movlpd: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50] +; ZNVER1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50] ; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; ZNVER1-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:0.50] ; ZNVER1-NEXT: retq # sched: [1:0.50] Index: test/CodeGen/X86/sse41-schedule.ll =================================================================== --- test/CodeGen/X86/sse41-schedule.ll +++ test/CodeGen/X86/sse41-schedule.ll @@ -21,112 +21,96 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_blendpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] ; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_blendpd: ; SLM: # %bb.0: -; SLM-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:1.00] ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-SSE-LABEL: test_blendpd: ; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] ; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_blendpd: ; SANDY: # %bb.0: -; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-SSE-LABEL: test_blendpd: ; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] ; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; HASWELL-LABEL: test_blendpd: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-SSE-LABEL: test_blendpd: ; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] ; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; BROADWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:0.50] ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_blendpd: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] ; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-SSE-LABEL: test_blendpd: ; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] ; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_blendpd: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-SSE-LABEL: test_blendpd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] ; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_blendpd: ; SKX: # %bb.0: -; SKX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] ; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_blendpd: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] ; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:1.00] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_blendpd: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] ; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_blendpd: ; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] ; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; ZNVER1-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [8:0.50] ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] ; ; ZNVER1-LABEL: test_blendpd: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] ; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; ZNVER1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [8:0.50] ; ZNVER1-NEXT: retq # sched: [1:0.50] Index: test/CodeGen/X86/vector-shuffle-combining.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-combining.ll +++ test/CodeGen/X86/vector-shuffle-combining.ll @@ -2700,21 +2700,36 @@ } define <4 x float> @PR22377(<4 x float> %a, <4 x float> %b) { -; SSE-LABEL: PR22377: -; SSE: # %bb.0: # %entry -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm0[1,3] -; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,0,2] -; SSE-NEXT: addps %xmm0, %xmm1 -; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE-NEXT: retq +; SSE2-LABEL: PR22377: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movaps %xmm0, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm0[2,3] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,0,2] +; SSE2-NEXT: addps %xmm0, %xmm1 +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: PR22377: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movaps %xmm0, %xmm1 +; SSSE3-NEXT: haddps %xmm0, %xmm1 +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,1] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: PR22377: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movaps %xmm0, %xmm1 +; SSE41-NEXT: haddps %xmm0, %xmm1 +; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,1] +; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] +; SSE41-NEXT: retq ; ; AVX-LABEL: PR22377: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,3,1,3] -; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,2] -; AVX-NEXT: vaddps %xmm0, %xmm1, %xmm1 -; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm1 +; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,1] +; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3] ; AVX-NEXT: retq entry: %s1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32>