Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1633,7 +1633,12 @@ break; } case ISD::ADD: - case ISD::SUB: { + case ISD::SUB: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: { APInt SrcUndef, SrcZero; if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef, SrcZero, TLO, Depth + 1)) Index: test/CodeGen/X86/avx-schedule.ll =================================================================== --- test/CodeGen/X86/avx-schedule.ll +++ test/CodeGen/X86/avx-schedule.ll @@ -502,56 +502,48 @@ define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; GENERIC-LABEL: test_blendpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50] ; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_blendpd: ; SANDY: # %bb.0: -; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50] ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_blendpd: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33] ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_blendpd: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33] ; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; BROADWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [7:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_blendpd: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33] ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_blendpd: ; SKX: # %bb.0: -; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33] ; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_blendpd: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:1.00] ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [6:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_blendpd: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50] ; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; ZNVER1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50] ; ZNVER1-NEXT: retq # sched: [1:0.50] Index: test/CodeGen/X86/avx512-hadd-hsub.ll =================================================================== --- test/CodeGen/X86/avx512-hadd-hsub.ll +++ test/CodeGen/X86/avx512-hadd-hsub.ll @@ -178,16 +178,16 @@ define <4 x double> @fadd_noundef_low(<8 x double> %x225, <8 x double> %x227) { ; KNL-LABEL: fadd_noundef_low: ; KNL: # %bb.0: -; KNL-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] -; KNL-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; KNL-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; KNL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; KNL-NEXT: vaddpd %zmm0, %zmm2, %zmm0 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: fadd_noundef_low: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] -; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; SKX-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; SKX-NEXT: vaddpd %zmm0, %zmm2, %zmm0 ; SKX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; SKX-NEXT: retq @@ -252,17 +252,15 @@ define double @fadd_noundef_eel(<8 x double> %x225, <8 x double> %x227) { ; KNL-LABEL: fadd_noundef_eel: ; KNL: # %bb.0: -; KNL-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] -; KNL-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] -; KNL-NEXT: vaddpd %zmm0, %zmm2, %zmm0 +; KNL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; KNL-NEXT: vaddpd %zmm1, %zmm0, %zmm0 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: fadd_noundef_eel: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] -; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] -; SKX-NEXT: vaddpd %zmm0, %zmm2, %zmm0 +; SKX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; SKX-NEXT: vaddpd %zmm1, %zmm0, %zmm0 ; SKX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -278,18 +276,18 @@ define double @fsub_noundef_ee (<8 x double> %x225, <8 x double> %x227) { ; KNL-LABEL: fsub_noundef_ee: ; KNL: # %bb.0: -; KNL-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] -; KNL-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] -; KNL-NEXT: vsubpd %zmm0, %zmm2, %zmm0 +; KNL-NEXT: vextractf32x4 $2, %zmm1, %xmm0 +; KNL-NEXT: vbroadcastsd %xmm0, %zmm0 +; KNL-NEXT: vsubpd %zmm1, %zmm0, %zmm0 ; KNL-NEXT: vextractf32x4 $2, %zmm0, %xmm0 ; KNL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; KNL-NEXT: retq ; ; SKX-LABEL: fsub_noundef_ee: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] -; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] -; SKX-NEXT: vsubpd %zmm0, %zmm2, %zmm0 +; SKX-NEXT: vextractf32x4 $2, %zmm1, %xmm0 +; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 +; SKX-NEXT: vsubpd %zmm1, %zmm0, %zmm0 ; SKX-NEXT: vextractf32x4 $2, %zmm0, %xmm0 ; SKX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; SKX-NEXT: vzeroupper Index: test/CodeGen/X86/avx512-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics-fast-isel.ll +++ test/CodeGen/X86/avx512-intrinsics-fast-isel.ll @@ -7210,8 +7210,7 @@ ; X86-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X86-NEXT: vaddpd %xmm1, %xmm0, %xmm0 -; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; X86-NEXT: vaddpd %xmm1, %xmm0, %xmm0 +; X86-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovlpd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) ; X86-NEXT: movl %ebp, %esp @@ -7226,8 +7225,7 @@ ; X64-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ; X64-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X64-NEXT: vaddpd %xmm1, %xmm0, %xmm0 -; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; X64-NEXT: vaddpd %xmm1, %xmm0, %xmm0 +; X64-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -7301,8 +7299,7 @@ ; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0 -; X86-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2] -; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; X86-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) ; X86-NEXT: flds (%esp) ; X86-NEXT: popl %eax @@ -7318,8 +7315,7 @@ ; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0 -; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2] -; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; X64-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -7351,7 +7347,7 @@ ; X86-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; X86-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; X86-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2] +; X86-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; X86-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) ; X86-NEXT: flds (%esp) @@ -7368,7 +7364,7 @@ ; X64-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; X64-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2] +; X64-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; X64-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq @@ -7407,8 +7403,7 @@ ; X86-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X86-NEXT: vaddpd %xmm1, %xmm0, %xmm0 -; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; X86-NEXT: vaddpd %xmm1, %xmm0, %xmm0 +; X86-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovlpd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) ; X86-NEXT: movl %ebp, %esp @@ -7425,8 +7420,7 @@ ; X64-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ; X64-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X64-NEXT: vaddpd %xmm1, %xmm0, %xmm0 -; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; X64-NEXT: vaddpd %xmm1, %xmm0, %xmm0 +; X64-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -7513,8 +7507,7 @@ ; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0 -; X86-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2] -; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; X86-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) ; X86-NEXT: flds (%esp) ; X86-NEXT: popl %eax @@ -7532,8 +7525,7 @@ ; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0 -; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2] -; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; X64-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -7570,7 +7562,7 @@ ; X86-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; X86-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; X86-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2] +; X86-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; X86-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) ; X86-NEXT: flds (%esp) @@ -7590,7 +7582,7 @@ ; X64-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; X64-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2] +; X64-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; X64-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq Index: test/CodeGen/X86/sse41-schedule.ll =================================================================== --- test/CodeGen/X86/sse41-schedule.ll +++ test/CodeGen/X86/sse41-schedule.ll @@ -21,112 +21,96 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_blendpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] ; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_blendpd: ; SLM: # %bb.0: -; SLM-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:1.00] ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-SSE-LABEL: test_blendpd: ; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] ; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_blendpd: ; SANDY: # %bb.0: -; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-SSE-LABEL: test_blendpd: ; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] ; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; HASWELL-LABEL: test_blendpd: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-SSE-LABEL: test_blendpd: ; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] ; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; BROADWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:0.50] ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_blendpd: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] ; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-SSE-LABEL: test_blendpd: ; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] ; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_blendpd: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-SSE-LABEL: test_blendpd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] ; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_blendpd: ; SKX: # %bb.0: -; SKX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] ; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_blendpd: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] ; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:1.00] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_blendpd: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] ; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_blendpd: ; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] ; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; ZNVER1-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [8:0.50] ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] ; ; ZNVER1-LABEL: test_blendpd: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] ; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; ZNVER1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [8:0.50] ; ZNVER1-NEXT: retq # sched: [1:0.50] Index: test/CodeGen/X86/vector-shuffle-combining.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-combining.ll +++ test/CodeGen/X86/vector-shuffle-combining.ll @@ -2700,21 +2700,36 @@ } define <4 x float> @PR22377(<4 x float> %a, <4 x float> %b) { -; SSE-LABEL: PR22377: -; SSE: # %bb.0: # %entry -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm0[1,3] -; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,0,2] -; SSE-NEXT: addps %xmm0, %xmm1 -; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE-NEXT: retq +; SSE2-LABEL: PR22377: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movaps %xmm0, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm0[2,3] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,0,2] +; SSE2-NEXT: addps %xmm0, %xmm1 +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: PR22377: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movaps %xmm0, %xmm1 +; SSSE3-NEXT: haddps %xmm0, %xmm1 +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,1] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: PR22377: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movaps %xmm0, %xmm1 +; SSE41-NEXT: haddps %xmm0, %xmm1 +; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,1] +; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] +; SSE41-NEXT: retq ; ; AVX-LABEL: PR22377: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,3,1,3] -; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,2] -; AVX-NEXT: vaddps %xmm0, %xmm1, %xmm1 -; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm1 +; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,1] +; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3] ; AVX-NEXT: retq entry: %s1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32>